LLVM 23.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1//===- InstCombineCasts.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for cast operations.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APInt.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/DataLayout.h"
22#include "llvm/IR/DebugInfo.h"
23#include "llvm/IR/Instruction.h"
25#include "llvm/IR/Type.h"
26#include "llvm/IR/Value.h"
29#include <iterator>
30#include <optional>
31
32using namespace llvm;
33using namespace PatternMatch;
34
35#define DEBUG_TYPE "instcombine"
36
38
41 EvaluatedMap &Processed) {
42 // Since we cover transformation of instructions with multiple users, we might
43 // come to the same node via multiple paths. We should not create a
44 // replacement for every single one of them though.
45 if (Value *Result = Processed.lookup(V))
46 return Result;
47
50
51 // Otherwise, it must be an instruction.
53 Instruction *Res = nullptr;
54 unsigned Opc = I->getOpcode();
55 switch (Opc) {
56 case Instruction::Add:
57 case Instruction::Sub:
58 case Instruction::Mul:
59 case Instruction::And:
60 case Instruction::Or:
61 case Instruction::Xor:
62 case Instruction::AShr:
63 case Instruction::LShr:
64 case Instruction::Shl:
65 case Instruction::UDiv:
66 case Instruction::URem: {
67 Value *LHS = EvaluateInDifferentTypeImpl(I->getOperand(0), Ty, isSigned, IC,
68 Processed);
69 Value *RHS = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned, IC,
70 Processed);
72 if (Opc == Instruction::LShr || Opc == Instruction::AShr)
73 Res->setIsExact(I->isExact());
74 break;
75 }
76 case Instruction::Trunc:
77 case Instruction::ZExt:
78 case Instruction::SExt:
79 // If the source type of the cast is the type we're trying for then we can
80 // just return the source. There's no need to insert it because it is not
81 // new.
82 if (I->getOperand(0)->getType() == Ty)
83 return I->getOperand(0);
84
85 // Otherwise, must be the same type of cast, so just reinsert a new one.
86 // This also handles the case of zext(trunc(x)) -> zext(x).
87 Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
88 Opc == Instruction::SExt);
89 break;
90 case Instruction::Select: {
91 Value *True = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned,
92 IC, Processed);
93 Value *False = EvaluateInDifferentTypeImpl(I->getOperand(2), Ty, isSigned,
94 IC, Processed);
95 Res = SelectInst::Create(I->getOperand(0), True, False);
96 break;
97 }
98 case Instruction::PHI: {
99 PHINode *OPN = cast<PHINode>(I);
101 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
103 isSigned, IC, Processed);
104 NPN->addIncoming(V, OPN->getIncomingBlock(i));
105 }
106 Res = NPN;
107 break;
108 }
109 case Instruction::FPToUI:
110 case Instruction::FPToSI:
111 Res = CastInst::Create(static_cast<Instruction::CastOps>(Opc),
112 I->getOperand(0), Ty);
113 break;
114 case Instruction::Call:
116 switch (II->getIntrinsicID()) {
117 default:
118 llvm_unreachable("Unsupported call!");
119 case Intrinsic::vscale: {
121 I->getModule(), Intrinsic::vscale, {Ty});
122 Res = CallInst::Create(Fn->getFunctionType(), Fn);
123 break;
124 }
125 case Intrinsic::umin:
126 case Intrinsic::umax:
127 case Intrinsic::smin:
128 case Intrinsic::smax: {
129 Value *Op0 = EvaluateInDifferentTypeImpl(II->getArgOperand(0), Ty,
130 isSigned, IC, Processed);
131 Value *Op1 = EvaluateInDifferentTypeImpl(II->getArgOperand(1), Ty,
132 isSigned, IC, Processed);
134 I->getModule(), II->getIntrinsicID(), {Ty});
135 Res = CallInst::Create(Fn->getFunctionType(), Fn, {Op0, Op1});
136 break;
137 }
138 case Intrinsic::abs: {
139 Value *Arg = EvaluateInDifferentTypeImpl(II->getArgOperand(0), Ty,
140 isSigned, IC, Processed);
142 I->getModule(), II->getIntrinsicID(), {Ty});
143 Res = CallInst::Create(Fn->getFunctionType(), Fn,
144 {Arg, ConstantInt::getFalse(I->getContext())});
145 break;
146 }
147 }
148 }
149 break;
150 case Instruction::ShuffleVector: {
151 auto *ScalarTy = cast<VectorType>(Ty)->getElementType();
152 auto *VTy = cast<VectorType>(I->getOperand(0)->getType());
153 auto *FixedTy = VectorType::get(ScalarTy, VTy->getElementCount());
154 Value *Op0 = EvaluateInDifferentTypeImpl(I->getOperand(0), FixedTy,
155 isSigned, IC, Processed);
156 Value *Op1 = EvaluateInDifferentTypeImpl(I->getOperand(1), FixedTy,
157 isSigned, IC, Processed);
158 Res = new ShuffleVectorInst(Op0, Op1,
159 cast<ShuffleVectorInst>(I)->getShuffleMask());
160 break;
161 }
162 default:
163 // TODO: Can handle more cases here.
164 llvm_unreachable("Unreachable!");
165 }
166
167 Res->takeName(I);
168 Value *Result = IC.InsertNewInstWith(Res, I->getIterator());
169 // There is no need in keeping track of the old value/new value relationship
170 // when we have only one user, we came have here from that user and no-one
171 // else cares.
172 if (!V->hasOneUse())
173 Processed[V] = Result;
174
175 return Result;
176}
177
178/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
179/// true for, actually insert the code to evaluate the expression.
181 bool isSigned) {
182 EvaluatedMap Processed;
183 return EvaluateInDifferentTypeImpl(V, Ty, isSigned, *this, Processed);
184}
185
187InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
188 const CastInst *CI2) {
189 Type *SrcTy = CI1->getSrcTy();
190 Type *MidTy = CI1->getDestTy();
191 Type *DstTy = CI2->getDestTy();
192
193 Instruction::CastOps firstOp = CI1->getOpcode();
194 Instruction::CastOps secondOp = CI2->getOpcode();
195 Type *SrcIntPtrTy =
196 SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
197 Type *DstIntPtrTy =
198 DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
199 unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
200 DstTy, &DL);
201
202 // We don't want to form an inttoptr or ptrtoint that converts to an integer
203 // type that differs from the pointer size.
204 if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
205 (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
206 Res = 0;
207
208 return Instruction::CastOps(Res);
209}
210
211/// Implement the transforms common to all CastInst visitors.
213 Value *Src = CI.getOperand(0);
214 Type *Ty = CI.getType();
215
216 if (Value *Res =
217 simplifyCastInst(CI.getOpcode(), Src, Ty, SQ.getWithInstruction(&CI)))
218 return replaceInstUsesWith(CI, Res);
219
220 // Try to eliminate a cast of a cast.
221 if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
222 if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
223 // The first cast (CSrc) is eliminable so we need to fix up or replace
224 // the second cast (CI). CSrc will then have a good chance of being dead.
225 auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
226 // Point debug users of the dying cast to the new one.
227 if (CSrc->hasOneUse())
228 replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
229 return Res;
230 }
231 }
232
233 if (auto *Sel = dyn_cast<SelectInst>(Src)) {
234 // We are casting a select. Try to fold the cast into the select if the
235 // select does not have a compare instruction with matching operand types
236 // or the select is likely better done in a narrow type.
237 // Creating a select with operands that are different sizes than its
238 // condition may inhibit other folds and lead to worse codegen.
239 auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
240 if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
241 (CI.getOpcode() == Instruction::Trunc &&
242 shouldChangeType(CI.getSrcTy(), CI.getType()))) {
243
244 // If it's a bitcast involving vectors, make sure it has the same number
245 // of elements on both sides.
246 if (CI.getOpcode() != Instruction::BitCast ||
248 if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
249 replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
250 return NV;
251 }
252 }
253 }
254 }
255
256 // If we are casting a PHI, then fold the cast into the PHI.
257 if (auto *PN = dyn_cast<PHINode>(Src)) {
258 // Don't do this if it would create a PHI node with an illegal type from a
259 // legal type.
260 if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
261 shouldChangeType(CI.getSrcTy(), CI.getType()))
262 if (Instruction *NV = foldOpIntoPhi(CI, PN))
263 return NV;
264 }
265
266 // Canonicalize a unary shuffle after the cast if neither operation changes
267 // the size or element size of the input vector.
268 // TODO: We could allow size-changing ops if that doesn't harm codegen.
269 // cast (shuffle X, Mask) --> shuffle (cast X), Mask
270 Value *X;
271 ArrayRef<int> Mask;
272 if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))) {
273 // TODO: Allow scalable vectors?
274 auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
275 auto *DestTy = dyn_cast<FixedVectorType>(Ty);
276 if (SrcTy && DestTy &&
277 SrcTy->getNumElements() == DestTy->getNumElements() &&
278 SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
279 Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
280 return new ShuffleVectorInst(CastX, Mask);
281 }
282 }
283
284 return nullptr;
285}
286
287namespace {
288
289/// Helper class for evaluating whether a value can be computed in a different
290/// type without changing its value. Used by cast simplification transforms.
291class TypeEvaluationHelper {
292public:
293 /// Return true if we can evaluate the specified expression tree as type Ty
294 /// instead of its larger type, and arrive with the same value.
295 /// This is used by code that tries to eliminate truncates.
296 [[nodiscard]] static bool canEvaluateTruncated(Value *V, Type *Ty,
298 Instruction *CxtI);
299
300 /// Determine if the specified value can be computed in the specified wider
301 /// type and produce the same low bits. If not, return false.
302 [[nodiscard]] static bool canEvaluateZExtd(Value *V, Type *Ty,
303 unsigned &BitsToClear,
305 Instruction *CxtI);
306
307 /// Return true if we can take the specified value and return it as type Ty
308 /// without inserting any new casts and without changing the value of the
309 /// common low bits.
310 [[nodiscard]] static bool canEvaluateSExtd(Value *V, Type *Ty);
311
312private:
313 /// Constants and extensions/truncates from the destination type are always
314 /// free to be evaluated in that type.
315 [[nodiscard]] static bool canAlwaysEvaluateInType(Value *V, Type *Ty);
316
317 /// Check if we traversed all the users of the multi-use values we've seen.
318 [[nodiscard]] bool allPendingVisited() const {
319 return llvm::all_of(Pending,
320 [this](Value *V) { return Visited.contains(V); });
321 }
322
323 /// A generic wrapper for canEvaluate* recursions to inject visitation
324 /// tracking and enforce correct multi-use value evaluations.
325 [[nodiscard]] bool
326 canEvaluate(Value *V, Type *Ty,
327 llvm::function_ref<bool(Value *, Type *Type)> Pred) {
328 if (canAlwaysEvaluateInType(V, Ty))
329 return true;
330
331 auto *I = dyn_cast<Instruction>(V);
332
333 if (I == nullptr)
334 return false;
335
336 // We insert false by default to return false when we encounter user loops.
337 const auto [It, Inserted] = Visited.insert({V, false});
338
339 // There are three possible cases for us having information on this value
340 // in the Visited map:
341 // 1. We properly checked it and concluded that we can evaluate it (true)
342 // 2. We properly checked it and concluded that we can't (false)
343 // 3. We started to check it, but during the recursive traversal we came
344 // back to it.
345 //
346 // For cases 1 and 2, we can safely return the stored result. For case 3, we
347 // can potentially have a situation where we can evaluate recursive user
348 // chains, but that can be quite tricky to do properly and isntead, we
349 // return false.
350 //
351 // In any case, we should return whatever was there in the map to begin
352 // with.
353 if (!Inserted)
354 return It->getSecond();
355
356 // We can easily make a decision about single-user values whether they can
357 // be evaluated in a different type or not, we came from that user. This is
358 // not as simple for multi-user values.
359 //
360 // In general, we have the following case (inverted control-flow, users are
361 // at the top):
362 //
363 // Cast %A
364 // ____|
365 // /
366 // %A = Use %B, %C
367 // ________| |
368 // / |
369 // %B = Use %D |
370 // ________| |
371 // / |
372 // %D = Use %C |
373 // ________|___|
374 // /
375 // %C = ...
376 //
377 // In this case, when we check %A, %B and %D, we are confident that we can
378 // make the decision here and now, since we came from their only users.
379 //
380 // For %C, it is harder. We come there twice, and when we come the first
381 // time, it's hard to tell if we will visit the second user (technically
382 // it's not hard, but we might need a lot of repetitive checks with non-zero
383 // cost).
384 //
385 // In the case above, we are allowed to evaluate %C in different type
386 // because all of it users were part of the traversal.
387 //
388 // In the following case, however, we can't make this conclusion:
389 //
390 // Cast %A
391 // ____|
392 // /
393 // %A = Use %B, %C
394 // ________| |
395 // / |
396 // %B = Use %D |
397 // ________| |
398 // / |
399 // %D = Use %C |
400 // | |
401 // foo(%C) | | <- never traversing foo(%C)
402 // ________|___|
403 // /
404 // %C = ...
405 //
406 // In this case, we still can evaluate %C in a different type, but we'd need
407 // to create a copy of the original %C to be used in foo(%C). Such
408 // duplication might be not profitable.
409 //
410 // For this reason, we collect all users of the mult-user values and mark
411 // them as "pending" and defer this decision to the very end. When we are
412 // done and and ready to have a positive verdict, we should double-check all
413 // of the pending users and ensure that we visited them. allPendingVisited
414 // predicate checks exactly that.
415 if (!I->hasOneUse())
416 llvm::append_range(Pending, I->users());
417
418 const bool Result = Pred(V, Ty);
419 // We have to set result this way and not via It because Pred is recursive
420 // and it is very likely that we grew Visited and invalidated It.
421 Visited[V] = Result;
422 return Result;
423 }
424
425 /// Filter out values that we can not evaluate in the destination type for
426 /// free.
427 [[nodiscard]] bool canNotEvaluateInType(Value *V, Type *Ty);
428
429 [[nodiscard]] bool canEvaluateTruncatedImpl(Value *V, Type *Ty,
430 InstCombinerImpl &IC,
431 Instruction *CxtI);
432 [[nodiscard]] bool canEvaluateTruncatedPred(Value *V, Type *Ty,
433 InstCombinerImpl &IC,
434 Instruction *CxtI);
435 [[nodiscard]] bool canEvaluateZExtdImpl(Value *V, Type *Ty,
436 unsigned &BitsToClear,
437 InstCombinerImpl &IC,
438 Instruction *CxtI);
439 [[nodiscard]] bool canEvaluateSExtdImpl(Value *V, Type *Ty);
440 [[nodiscard]] bool canEvaluateSExtdPred(Value *V, Type *Ty);
441
442 /// A bookkeeping map to memorize an already made decision for a traversed
443 /// value.
444 SmallDenseMap<Value *, bool, 8> Visited;
445
446 /// A list of pending values to check in the end.
447 SmallVector<Value *, 8> Pending;
448};
449
450} // anonymous namespace
451
452/// Constants and extensions/truncates from the destination type are always
453/// free to be evaluated in that type. This is a helper for canEvaluate*.
454bool TypeEvaluationHelper::canAlwaysEvaluateInType(Value *V, Type *Ty) {
455 if (isa<Constant>(V))
456 return match(V, m_ImmConstant());
457
458 Value *X;
459 if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
460 X->getType() == Ty)
461 return true;
462
463 return false;
464}
465
466/// Filter out values that we can not evaluate in the destination type for free.
467/// This is a helper for canEvaluate*.
468bool TypeEvaluationHelper::canNotEvaluateInType(Value *V, Type *Ty) {
469 if (!isa<Instruction>(V))
470 return true;
471 // We don't extend or shrink something that has multiple uses -- doing so
472 // would require duplicating the instruction which isn't profitable.
473 if (!V->hasOneUse())
474 return true;
475
476 return false;
477}
478
479/// Return true if we can evaluate the specified expression tree as type Ty
480/// instead of its larger type, and arrive with the same value.
481/// This is used by code that tries to eliminate truncates.
482///
483/// Ty will always be a type smaller than V. We should return true if trunc(V)
484/// can be computed by computing V in the smaller type. If V is an instruction,
485/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
486/// makes sense if x and y can be efficiently truncated.
487///
488/// This function works on both vectors and scalars.
489///
490bool TypeEvaluationHelper::canEvaluateTruncated(Value *V, Type *Ty,
492 Instruction *CxtI) {
493 TypeEvaluationHelper TYH;
494 return TYH.canEvaluateTruncatedImpl(V, Ty, IC, CxtI) &&
495 // We need to check whether we visited all users of multi-user values,
496 // and we have to do it at the very end, outside of the recursion.
497 TYH.allPendingVisited();
498}
499
500bool TypeEvaluationHelper::canEvaluateTruncatedImpl(Value *V, Type *Ty,
502 Instruction *CxtI) {
503 return canEvaluate(V, Ty, [this, &IC, CxtI](Value *V, Type *Ty) {
504 return canEvaluateTruncatedPred(V, Ty, IC, CxtI);
505 });
506}
507
508bool TypeEvaluationHelper::canEvaluateTruncatedPred(Value *V, Type *Ty,
510 Instruction *CxtI) {
511 auto *I = cast<Instruction>(V);
512 Type *OrigTy = V->getType();
513 switch (I->getOpcode()) {
514 case Instruction::Add:
515 case Instruction::Sub:
516 case Instruction::Mul:
517 case Instruction::And:
518 case Instruction::Or:
519 case Instruction::Xor:
520 // These operators can all arbitrarily be extended or truncated.
521 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
522 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
523
524 case Instruction::UDiv:
525 case Instruction::URem: {
526 // UDiv and URem can be truncated if all the truncated bits are zero.
527 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
528 uint32_t BitWidth = Ty->getScalarSizeInBits();
529 assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
530 APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
531 // Do not preserve the original context instruction. Simplifying div/rem
532 // based on later context may introduce a trap.
533 if (IC.MaskedValueIsZero(I->getOperand(0), Mask, I) &&
534 IC.MaskedValueIsZero(I->getOperand(1), Mask, I)) {
535 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
536 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
537 }
538 break;
539 }
540 case Instruction::Shl: {
541 // If we are truncating the result of this SHL, and if it's a shift of an
542 // inrange amount, we can always perform a SHL in a smaller type.
543 uint32_t BitWidth = Ty->getScalarSizeInBits();
544 KnownBits AmtKnownBits =
545 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
546 if (AmtKnownBits.getMaxValue().ult(BitWidth))
547 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
548 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
549 break;
550 }
551 case Instruction::LShr: {
552 // If this is a truncate of a logical shr, we can truncate it to a smaller
553 // lshr iff we know that the bits we would otherwise be shifting in are
554 // already zeros.
555 // TODO: It is enough to check that the bits we would be shifting in are
556 // zero - use AmtKnownBits.getMaxValue().
557 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
558 uint32_t BitWidth = Ty->getScalarSizeInBits();
559 KnownBits AmtKnownBits = IC.computeKnownBits(I->getOperand(1), CxtI);
560 APInt MaxShiftAmt = AmtKnownBits.getMaxValue();
561 APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
562 if (MaxShiftAmt.ult(BitWidth)) {
563 // If the only user is a trunc then we can narrow the shift if any new
564 // MSBs are not going to be used.
565 if (auto *Trunc = dyn_cast<TruncInst>(V->user_back())) {
566 auto DemandedBits = Trunc->getType()->getScalarSizeInBits();
567 if ((MaxShiftAmt + DemandedBits).ule(BitWidth))
568 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
569 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
570 }
571 if (IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, CxtI))
572 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
573 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
574 }
575 break;
576 }
577 case Instruction::AShr: {
578 // If this is a truncate of an arithmetic shr, we can truncate it to a
579 // smaller ashr iff we know that all the bits from the sign bit of the
580 // original type and the sign bit of the truncate type are similar.
581 // TODO: It is enough to check that the bits we would be shifting in are
582 // similar to sign bit of the truncate type.
583 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
584 uint32_t BitWidth = Ty->getScalarSizeInBits();
585 KnownBits AmtKnownBits =
586 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
587 unsigned ShiftedBits = OrigBitWidth - BitWidth;
588 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
589 ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), CxtI))
590 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
591 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
592 break;
593 }
594 case Instruction::Trunc:
595 // trunc(trunc(x)) -> trunc(x)
596 return true;
597 case Instruction::ZExt:
598 case Instruction::SExt:
599 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
600 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
601 return true;
602 case Instruction::Select: {
604 return canEvaluateTruncatedImpl(SI->getTrueValue(), Ty, IC, CxtI) &&
605 canEvaluateTruncatedImpl(SI->getFalseValue(), Ty, IC, CxtI);
606 }
607 case Instruction::PHI: {
608 // We can change a phi if we can change all operands. Note that we never
609 // get into trouble with cyclic PHIs here because canEvaluate handles use
610 // chain loops.
611 PHINode *PN = cast<PHINode>(I);
612 return llvm::all_of(
613 PN->incoming_values(), [this, Ty, &IC, CxtI](Value *IncValue) {
614 return canEvaluateTruncatedImpl(IncValue, Ty, IC, CxtI);
615 });
616 }
617 case Instruction::FPToUI:
618 case Instruction::FPToSI: {
619 // If the integer type can hold the max FP value, it is safe to cast
620 // directly to that type. Otherwise, we may create poison via overflow
621 // that did not exist in the original code.
622 Type *InputTy = I->getOperand(0)->getType()->getScalarType();
623 const fltSemantics &Semantics = InputTy->getFltSemantics();
624 uint32_t MinBitWidth = APFloatBase::semanticsIntSizeInBits(
625 Semantics, I->getOpcode() == Instruction::FPToSI);
626 return Ty->getScalarSizeInBits() >= MinBitWidth;
627 }
628 case Instruction::ShuffleVector:
629 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
630 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
631
632 case Instruction::Call: {
633 Value *AbsOp;
635 if (IC.ComputeMaxSignificantBits(AbsOp, CxtI) > Ty->getScalarSizeInBits())
636 return false;
637 return canEvaluateTruncatedImpl(AbsOp, Ty, IC, CxtI);
638 }
639 auto *MM = dyn_cast<MinMaxIntrinsic>(I);
640 if (!MM)
641 return false;
642 // The min/max can be performed in the narrow type when each operand has
643 // zero high bits (for umin/umax) or enough sign bits (for smin/smax).
644 Value *Op0 = MM->getLHS();
645 Value *Op1 = MM->getRHS();
646 uint32_t BitWidth = Ty->getScalarSizeInBits();
647 if (MM->isSigned()) {
648 if (IC.ComputeMaxSignificantBits(Op0, CxtI) > BitWidth ||
649 IC.ComputeMaxSignificantBits(Op1, CxtI) > BitWidth)
650 break;
651 } else {
652 APInt Mask =
654 if (!IC.MaskedValueIsZero(Op0, Mask, CxtI) ||
655 !IC.MaskedValueIsZero(Op1, Mask, CxtI))
656 break;
657 }
658 return canEvaluateTruncatedImpl(Op0, Ty, IC, CxtI) &&
659 canEvaluateTruncatedImpl(Op1, Ty, IC, CxtI);
660 }
661 default:
662 // TODO: Can handle more cases here.
663 break;
664 }
665
666 return false;
667}
668
669/// Given a vector that is bitcast to an integer, optionally logically
670/// right-shifted, and truncated, convert it to an extractelement.
671/// Example (big endian):
672/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
673/// --->
674/// extractelement <4 x i32> %X, 1
676 InstCombinerImpl &IC) {
677 Value *TruncOp = Trunc.getOperand(0);
678 Type *DestType = Trunc.getType();
679 if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
680 return nullptr;
681
682 Value *VecInput = nullptr;
683 ConstantInt *ShiftVal = nullptr;
684 if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
685 m_LShr(m_BitCast(m_Value(VecInput)),
686 m_ConstantInt(ShiftVal)))) ||
687 !isa<VectorType>(VecInput->getType()))
688 return nullptr;
689
690 VectorType *VecType = cast<VectorType>(VecInput->getType());
691 unsigned VecWidth = VecType->getPrimitiveSizeInBits();
692 unsigned DestWidth = DestType->getPrimitiveSizeInBits();
693 unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
694
695 if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
696 return nullptr;
697
698 // If the element type of the vector doesn't match the result type,
699 // bitcast it to a vector type that we can extract from.
700 unsigned NumVecElts = VecWidth / DestWidth;
701 if (VecType->getElementType() != DestType) {
702 VecType = FixedVectorType::get(DestType, NumVecElts);
703 VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
704 }
705
706 unsigned Elt = ShiftAmount / DestWidth;
707 if (IC.getDataLayout().isBigEndian())
708 Elt = NumVecElts - 1 - Elt;
709
710 return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
711}
712
713/// Whenever an element is extracted from a vector, optionally shifted down, and
714/// then truncated, canonicalize by converting it to a bitcast followed by an
715/// extractelement.
716///
717/// Examples (little endian):
718/// trunc (extractelement <4 x i64> %X, 0) to i32
719/// --->
720/// extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
721///
722/// trunc (lshr (extractelement <4 x i32> %X, 0), 8) to i8
723/// --->
724/// extractelement <16 x i8> (bitcast <4 x i32> %X to <16 x i8>), i32 1
726 InstCombinerImpl &IC) {
727 Value *Src = Trunc.getOperand(0);
728 Type *SrcType = Src->getType();
729 Type *DstType = Trunc.getType();
730
731 // Only attempt this if we have simple aliasing of the vector elements.
732 // A badly fit destination size would result in an invalid cast.
733 unsigned SrcBits = SrcType->getScalarSizeInBits();
734 unsigned DstBits = DstType->getScalarSizeInBits();
735 unsigned TruncRatio = SrcBits / DstBits;
736 if ((SrcBits % DstBits) != 0)
737 return nullptr;
738
739 Value *VecOp;
740 ConstantInt *Cst;
741 const APInt *ShiftAmount = nullptr;
742 if (!match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst)))) &&
743 !match(Src,
745 m_APInt(ShiftAmount)))))
746 return nullptr;
747
748 auto *VecOpTy = cast<VectorType>(VecOp->getType());
749 auto VecElts = VecOpTy->getElementCount();
750
751 uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
752 // Make sure we don't overflow in the calculation of the new index.
753 // (VecOpIdx + 1) * TruncRatio should not overflow.
754 if (Cst->uge(std::numeric_limits<uint64_t>::max() / TruncRatio))
755 return nullptr;
756 uint64_t VecOpIdx = Cst->getZExtValue();
757 uint64_t NewIdx = IC.getDataLayout().isBigEndian()
758 ? (VecOpIdx + 1) * TruncRatio - 1
759 : VecOpIdx * TruncRatio;
760
761 // Adjust index by the whole number of truncated elements.
762 if (ShiftAmount) {
763 // Check shift amount is in range and shifts a whole number of truncated
764 // elements.
765 if (ShiftAmount->uge(SrcBits) || ShiftAmount->urem(DstBits) != 0)
766 return nullptr;
767
768 uint64_t IdxOfs = ShiftAmount->udiv(DstBits).getZExtValue();
769 // IdxOfs is guaranteed to be less than TruncRatio, so we won't overflow in
770 // the adjustment.
771 assert(IdxOfs < TruncRatio &&
772 "IdxOfs is expected to be less than TruncRatio.");
773 NewIdx = IC.getDataLayout().isBigEndian() ? (NewIdx - IdxOfs)
774 : (NewIdx + IdxOfs);
775 }
776
777 assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
778 "overflow 32-bits");
779
780 auto *BitCastTo =
781 VectorType::get(DstType, BitCastNumElts, VecElts.isScalable());
782 Value *BitCast = IC.Builder.CreateBitCast(VecOp, BitCastTo);
783 return ExtractElementInst::Create(BitCast, IC.Builder.getInt64(NewIdx));
784}
785
786/// Funnel/Rotate left/right may occur in a wider type than necessary because of
787/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
788Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
789 assert((isa<VectorType>(Trunc.getSrcTy()) ||
790 shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
791 "Don't narrow to an illegal scalar type");
792
793 // Bail out on strange types. It is possible to handle some of these patterns
794 // even with non-power-of-2 sizes, but it is not a likely scenario.
795 Type *DestTy = Trunc.getType();
796 unsigned NarrowWidth = DestTy->getScalarSizeInBits();
797 unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
798 if (!isPowerOf2_32(NarrowWidth))
799 return nullptr;
800
801 // First, find an or'd pair of opposite shifts:
802 // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
803 BinaryOperator *Or0, *Or1;
804 if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
805 return nullptr;
806
807 Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
808 if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
809 !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
810 Or0->getOpcode() == Or1->getOpcode())
811 return nullptr;
812
813 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
814 if (Or0->getOpcode() == BinaryOperator::LShr) {
815 std::swap(Or0, Or1);
816 std::swap(ShVal0, ShVal1);
817 std::swap(ShAmt0, ShAmt1);
818 }
819 assert(Or0->getOpcode() == BinaryOperator::Shl &&
820 Or1->getOpcode() == BinaryOperator::LShr &&
821 "Illegal or(shift,shift) pair");
822
823 // Match the shift amount operands for a funnel/rotate pattern. This always
824 // matches a subtraction on the R operand.
825 auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
826 // The shift amounts may add up to the narrow bit width:
827 // (shl ShVal0, L) | (lshr ShVal1, Width - L)
828 // If this is a funnel shift (different operands are shifted), then the
829 // shift amount can not over-shift (create poison) in the narrow type.
830 unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
831 APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
832 if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
833 if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
834 return L;
835
836 // The following patterns currently only work for rotation patterns.
837 // TODO: Add more general funnel-shift compatible patterns.
838 if (ShVal0 != ShVal1)
839 return nullptr;
840
841 // The shift amount may be masked with negation:
842 // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
843 Value *X;
844 unsigned Mask = Width - 1;
845 if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
847 return X;
848
849 // Same as above, but the shift amount may be extended after masking:
850 if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
852 return X;
853
854 return nullptr;
855 };
856
857 Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
858 bool IsFshl = true; // Sub on LSHR.
859 if (!ShAmt) {
860 ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
861 IsFshl = false; // Sub on SHL.
862 }
863 if (!ShAmt)
864 return nullptr;
865
866 // The right-shifted value must have high zeros in the wide type (for example
867 // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
868 // truncated, so those do not matter.
869 APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
870 if (!MaskedValueIsZero(ShVal1, HiBitMask, &Trunc))
871 return nullptr;
872
873 // Adjust the width of ShAmt for narrowed funnel shift operation:
874 // - Zero-extend if ShAmt is narrower than the destination type.
875 // - Truncate if ShAmt is wider, discarding non-significant high-order bits.
876 // This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),
877 // zext/trunc(ShAmt)).
878 Value *NarrowShAmt = Builder.CreateZExtOrTrunc(ShAmt, DestTy);
879
880 Value *X, *Y;
881 X = Y = Builder.CreateTrunc(ShVal0, DestTy);
882 if (ShVal0 != ShVal1)
883 Y = Builder.CreateTrunc(ShVal1, DestTy);
884 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
885 Function *F =
886 Intrinsic::getOrInsertDeclaration(Trunc.getModule(), IID, DestTy);
887 return CallInst::Create(F, {X, Y, NarrowShAmt});
888}
889
890/// Try to narrow the width of math or bitwise logic instructions by pulling a
891/// truncate ahead of binary operators.
892Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
893 Type *SrcTy = Trunc.getSrcTy();
894 Type *DestTy = Trunc.getType();
895 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
896 unsigned DestWidth = DestTy->getScalarSizeInBits();
897
898 if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
899 return nullptr;
900
901 BinaryOperator *BinOp;
902 if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
903 return nullptr;
904
905 Value *BinOp0 = BinOp->getOperand(0);
906 Value *BinOp1 = BinOp->getOperand(1);
907 switch (BinOp->getOpcode()) {
908 case Instruction::And:
909 case Instruction::Or:
910 case Instruction::Xor:
911 case Instruction::Add:
912 case Instruction::Sub:
913 case Instruction::Mul: {
914 Constant *C;
915 if (match(BinOp0, m_Constant(C))) {
916 // trunc (binop C, X) --> binop (trunc C', X)
917 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
918 Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
919 return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
920 }
921 if (match(BinOp1, m_Constant(C))) {
922 // trunc (binop X, C) --> binop (trunc X, C')
923 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
924 Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
925 return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
926 }
927 Value *X;
928 if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
929 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
930 Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
931 return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
932 }
933 if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
934 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
935 Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
936 return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
937 }
938 break;
939 }
940 case Instruction::LShr:
941 case Instruction::AShr: {
942 // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
943 Value *A;
944 Constant *C;
945 if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
946 unsigned MaxShiftAmt = SrcWidth - DestWidth;
947 // If the shift is small enough, all zero/sign bits created by the shift
948 // are removed by the trunc.
950 APInt(SrcWidth, MaxShiftAmt)))) {
951 auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
952 bool IsExact = OldShift->isExact();
953 if (Constant *ShAmt = ConstantFoldIntegerCast(C, A->getType(),
954 /*IsSigned*/ true, DL)) {
955 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
956 Value *Shift =
957 OldShift->getOpcode() == Instruction::AShr
958 ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
959 : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
960 return CastInst::CreateTruncOrBitCast(Shift, DestTy);
961 }
962 }
963 }
964 break;
965 }
966 default: break;
967 }
968
969 if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
970 return NarrowOr;
971
972 return nullptr;
973}
974
975/// Try to narrow the width of a splat shuffle. This could be generalized to any
976/// shuffle with a constant operand, but we limit the transform to avoid
977/// creating a shuffle type that targets may not be able to lower effectively.
979 InstCombiner::BuilderTy &Builder) {
980 Value *Shuf = Trunc.getOperand(0), *ShufVec;
981 ArrayRef<int> SplatMask;
982 if (match(Shuf, m_OneUse(m_Shuffle(m_Value(ShufVec), m_Poison(),
983 m_Mask(SplatMask)))) &&
984 match(SplatMask, m_SplatMask()) &&
986 cast<VectorType>(Shuf->getType())->getElementCount(),
987 cast<VectorType>(ShufVec->getType())->getElementCount())) {
988 // trunc (shuf X, poison, SplatMask) --> shuf (trunc X), poison, SplatMask
989 Type *NewTruncTy =
990 ShufVec->getType()->getWithNewType(Trunc.getType()->getScalarType());
991 Value *NarrowOp = Builder.CreateTrunc(ShufVec, NewTruncTy);
992 return new ShuffleVectorInst(NarrowOp, SplatMask);
993 }
994
995 return nullptr;
996}
997
998/// Try to narrow the width of an insert element. This could be generalized for
999/// any vector constant, but we limit the transform to insertion into poison to
1000/// avoid potential backend problems from unsupported insertion widths. This
1001/// could also be extended to handle the case of inserting a scalar constant
1002/// into a vector variable.
1004 InstCombiner::BuilderTy &Builder) {
1005 Instruction::CastOps Opcode = Trunc.getOpcode();
1006 assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
1007 "Unexpected instruction for shrinking");
1008
1009 Value *Elt, *Index;
1010 if (match(Trunc.getOperand(0),
1011 m_OneUse(m_InsertElt(m_Poison(), m_Value(Elt), m_Value(Index))))) {
1012 // trunc (inselt poison, X, Index) --> inselt poison, (trunc X), Index
1013 // fptrunc (inselt poison, X, Index) --> inselt poison, (fptrunc X), Index
1014 auto *NarrowPoison = PoisonValue::get(Trunc.getType());
1015 Value *NarrowOp =
1016 Builder.CreateCast(Opcode, Elt, Trunc.getType()->getScalarType());
1017 return InsertElementInst::Create(NarrowPoison, NarrowOp, Index);
1018 }
1019
1020 return nullptr;
1021}
1022
1024 if (Instruction *Result = commonCastTransforms(Trunc))
1025 return Result;
1026
1027 Value *Src = Trunc.getOperand(0);
1028 Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
1029 unsigned DestWidth = DestTy->getScalarSizeInBits();
1030 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
1031
1032 // Attempt to truncate the entire input expression tree to the destination
1033 // type. Only do this if the dest type is a simple type, don't convert the
1034 // expression tree to something weird like i93 unless the source is also
1035 // strange.
1036 if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
1037 TypeEvaluationHelper::canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
1038
1039 // If this cast is a truncate, evaluting in a different type always
1040 // eliminates the cast, so it is always a win.
1041 LLVM_DEBUG(
1042 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1043 " to avoid cast: "
1044 << Trunc << '\n');
1045 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1046 assert(Res->getType() == DestTy);
1047 return replaceInstUsesWith(Trunc, Res);
1048 }
1049
1050 // For integer types, check if we can shorten the entire input expression to
1051 // DestWidth * 2, which won't allow removing the truncate, but reducing the
1052 // width may enable further optimizations, e.g. allowing for larger
1053 // vectorization factors.
1054 if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
1055 if (DestWidth * 2 < SrcWidth) {
1056 auto *NewDestTy = DestITy->getExtendedType();
1057 if (shouldChangeType(SrcTy, NewDestTy) &&
1058 TypeEvaluationHelper::canEvaluateTruncated(Src, NewDestTy, *this,
1059 &Trunc)) {
1060 LLVM_DEBUG(
1061 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1062 " to reduce the width of operand of"
1063 << Trunc << '\n');
1064 Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
1065 return new TruncInst(Res, DestTy);
1066 }
1067 }
1068 }
1069
1070 // See if we can simplify any instructions used by the input whose sole
1071 // purpose is to compute bits we don't care about.
1073 return &Trunc;
1074
1075 if (DestWidth == 1) {
1076 Value *Zero = Constant::getNullValue(SrcTy);
1077
1078 Value *X;
1079 const APInt *C1;
1080 Constant *C2;
1081 if (match(Src, m_OneUse(m_Shr(m_Shl(m_Power2(C1), m_Value(X)),
1082 m_ImmConstant(C2))))) {
1083 // trunc ((C1 << X) >> C2) to i1 --> X == (C2-cttz(C1)), where C1 is pow2
1084 Constant *Log2C1 = ConstantInt::get(SrcTy, C1->exactLogBase2());
1085 Constant *CmpC = ConstantExpr::getSub(C2, Log2C1);
1086 return new ICmpInst(ICmpInst::ICMP_EQ, X, CmpC);
1087 }
1088
1089 if (match(Src, m_Shr(m_Value(X), m_SpecificInt(SrcWidth - 1)))) {
1090 // trunc (ashr X, BW-1) to i1 --> icmp slt X, 0
1091 // trunc (lshr X, BW-1) to i1 --> icmp slt X, 0
1092 return new ICmpInst(ICmpInst::ICMP_SLT, X, Zero);
1093 }
1094
1095 Constant *C;
1096 if (match(Src, m_OneUse(m_LShr(m_Value(X), m_ImmConstant(C))))) {
1097 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
1098 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
1099 Value *MaskC = Builder.CreateShl(One, C);
1100 Value *And = Builder.CreateAnd(X, MaskC);
1101 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
1102 }
1104 m_Deferred(X))))) {
1105 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
1106 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
1107 Value *MaskC = Builder.CreateShl(One, C);
1108 Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One));
1109 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
1110 }
1111
1112 {
1113 const APInt *C;
1114 if (match(Src, m_Shl(m_APInt(C), m_Value(X))) && (*C)[0] == 1) {
1115 // trunc (C << X) to i1 --> X == 0, where C is odd
1116 return new ICmpInst(ICmpInst::Predicate::ICMP_EQ, X, Zero);
1117 }
1118 }
1119
1120 if (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) {
1121 Value *X, *Y;
1122 if (match(Src, m_Xor(m_Value(X), m_Value(Y))))
1123 return new ICmpInst(ICmpInst::ICMP_NE, X, Y);
1124 }
1125
1126 if (match(Src,
1128 return new ICmpInst(ICmpInst::ICMP_EQ, X,
1130 }
1131
1132 Value *A, *B;
1133 Constant *C;
1134
1135 // trunc(u/smin(zext(a) + zext(b), MAX)) --> uadd.sat(a, b)
1136 if (match(Src,
1139 m_SpecificInt(APInt::getMaxValue(DestWidth))),
1141 m_SpecificInt(APInt::getMaxValue(DestWidth)))))) &&
1142 A->getType() == DestTy && B->getType() == DestTy) {
1143 return replaceInstUsesWith(
1144 Trunc, Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, A, B));
1145 }
1146
1147 // trunc(smax(zext(a) - zext(b), 0)) --> usub.sat(a, b)
1148 if (match(Src, m_OneUse(m_SMax(
1150 m_Zero()))) &&
1151 A->getType() == DestTy && B->getType() == DestTy) {
1152 return replaceInstUsesWith(
1153 Trunc, Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, A, B));
1154 }
1155
1156 if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
1157 unsigned AWidth = A->getType()->getScalarSizeInBits();
1158 unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
1159 auto *OldSh = cast<Instruction>(Src);
1160 bool IsExact = OldSh->isExact();
1161
1162 // If the shift is small enough, all zero bits created by the shift are
1163 // removed by the trunc.
1165 APInt(SrcWidth, MaxShiftAmt)))) {
1166 auto GetNewShAmt = [&](unsigned Width) {
1167 Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);
1168 Constant *Cmp =
1170 Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);
1171 return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),
1172 DL);
1173 };
1174
1175 // trunc (lshr (sext A), C) --> ashr A, C
1176 if (A->getType() == DestTy) {
1177 Constant *ShAmt = GetNewShAmt(DestWidth);
1178 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
1179 return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
1180 : BinaryOperator::CreateAShr(A, ShAmt);
1181 }
1182 // The types are mismatched, so create a cast after shifting:
1183 // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
1184 if (Src->hasOneUse()) {
1185 Constant *ShAmt = GetNewShAmt(AWidth);
1186 Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
1187 return CastInst::CreateIntegerCast(Shift, DestTy, true);
1188 }
1189 }
1190 // TODO: Mask high bits with 'and'.
1191 }
1192
1193 if (Instruction *I = narrowBinOp(Trunc))
1194 return I;
1195
1196 if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
1197 return I;
1198
1199 if (Instruction *I = shrinkInsertElt(Trunc, Builder))
1200 return I;
1201
1202 if (Src->hasOneUse() &&
1203 (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
1204 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
1205 // dest type is native and cst < dest size.
1206 if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
1207 !match(A, m_Shr(m_Value(), m_Constant()))) {
1208 // Skip shifts of shift by constants. It undoes a combine in
1209 // FoldShiftByConstant and is the extend in reg pattern.
1210 APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
1211 if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
1212 Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
1213 return BinaryOperator::Create(Instruction::Shl, NewTrunc,
1214 ConstantExpr::getTrunc(C, DestTy));
1215 }
1216 }
1217 }
1218
1219 // trunc (select(icmp_ult(A, DestTy_umax+1), A, sext(icmp_sgt(A, 0)))) -->
1220 // trunc (smin(smax(0, A), DestTy_umax))
1221 if (SrcTy->isIntegerTy() && isPowerOf2_64(SrcTy->getPrimitiveSizeInBits()) &&
1223 match(Src, m_OneUse(m_Select(
1225 m_Constant(C))),
1226 m_Deferred(A),
1228 ICmpInst::ICMP_SGT, m_Deferred(A), m_Zero())))))))) {
1229 APInt UpperBound = C->getUniqueInteger();
1230 APInt TruncatedMax = APInt::getAllOnes(DestTy->getIntegerBitWidth());
1231 TruncatedMax = TruncatedMax.zext(UpperBound.getBitWidth());
1232 if (!UpperBound.isZero() && UpperBound - 1 == TruncatedMax) {
1233 Value *SMax = Builder.CreateIntrinsic(Intrinsic::smax, {SrcTy},
1234 {ConstantInt::get(SrcTy, 0), A});
1235 Value *SMin = Builder.CreateIntrinsic(
1236 Intrinsic::smin, {SrcTy},
1237 {SMax, ConstantInt::get(SrcTy, TruncatedMax)});
1238 return new TruncInst(SMin, DestTy);
1239 }
1240 }
1241
1242 if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
1243 return I;
1244
1245 if (Instruction *I = foldVecExtTruncToExtElt(Trunc, *this))
1246 return I;
1247
1248 // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
1249 if (match(Src, m_OneUse(m_Ctlz(m_ZExt(m_Value(A)), m_Value(B))))) {
1250 unsigned AWidth = A->getType()->getScalarSizeInBits();
1251 if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
1252 Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
1253 Value *NarrowCtlz =
1254 Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
1255 return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
1256 }
1257 }
1258
1259 if (match(Src, m_VScale())) {
1260 if (Trunc.getFunction() &&
1261 Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1262 Attribute Attr =
1263 Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
1264 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
1265 if (Log2_32(*MaxVScale) < DestWidth)
1266 return replaceInstUsesWith(Trunc, Builder.CreateVScale(DestTy));
1267 }
1268 }
1269
1270 if (DestWidth == 1 &&
1271 (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) &&
1272 isKnownNonZero(Src, SQ.getWithInstruction(&Trunc)))
1273 return replaceInstUsesWith(Trunc, ConstantInt::getTrue(DestTy));
1274
1275 bool Changed = false;
1276 if (!Trunc.hasNoSignedWrap() &&
1277 ComputeMaxSignificantBits(Src, &Trunc) <= DestWidth) {
1278 Trunc.setHasNoSignedWrap(true);
1279 Changed = true;
1280 }
1281 if (!Trunc.hasNoUnsignedWrap() &&
1282 MaskedValueIsZero(Src, APInt::getBitsSetFrom(SrcWidth, DestWidth),
1283 &Trunc)) {
1284 Trunc.setHasNoUnsignedWrap(true);
1285 Changed = true;
1286 }
1287
1288 const APInt *C1;
1289 Value *V1;
1290 // OP = { lshr, ashr }
1291 // trunc ( OP i8 C1, V1) to i1 -> icmp eq V1, log_2(C1) iff C1 is power of 2
1292 if (DestWidth == 1 && match(Src, m_Shr(m_Power2(C1), m_Value(V1)))) {
1293 Value *Right = ConstantInt::get(V1->getType(), C1->countr_zero());
1294 return new ICmpInst(ICmpInst::ICMP_EQ, V1, Right);
1295 }
1296
1297 // OP = { lshr, ashr }
1298 // trunc ( OP i8 C1, V1) to i1 -> icmp ult V1, log_2(C1 + 1) iff (C1 + 1) is
1299 // power of 2
1300 if (DestWidth == 1 && match(Src, m_Shr(m_LowBitMask(C1), m_Value(V1)))) {
1301 Value *Right = ConstantInt::get(V1->getType(), C1->countr_one());
1302 return new ICmpInst(ICmpInst::ICMP_ULT, V1, Right);
1303 }
1304
1305 // OP = { lshr, ashr }
1306 // trunc ( OP i8 C1, V1) to i1 -> icmp ugt V1, cttz(C1) - 1 iff (C1) is
1307 // negative power of 2
1308 if (DestWidth == 1 && match(Src, m_Shr(m_NegatedPower2(C1), m_Value(V1)))) {
1309 Value *Right = ConstantInt::get(V1->getType(), C1->countr_zero());
1310 return new ICmpInst(ICmpInst::ICMP_UGE, V1, Right);
1311 }
1312
1313 return Changed ? &Trunc : nullptr;
1314}
1315
1316Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
1317 ZExtInst &Zext) {
1318 // If we are just checking for a icmp eq of a single bit and zext'ing it
1319 // to an integer, then shift the bit to the appropriate place and then
1320 // cast to integer to avoid the comparison.
1321
1322 // FIXME: This set of transforms does not check for extra uses and/or creates
1323 // an extra instruction (an optional final cast is not included
1324 // in the transform comments). We may also want to favor icmp over
1325 // shifts in cases of equal instructions because icmp has better
1326 // analysis in general (invert the transform).
1327
1328 const APInt *Op1CV;
1329 if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
1330
1331 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
1332 if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
1333 Value *In = Cmp->getOperand(0);
1334 Value *Sh = ConstantInt::get(In->getType(),
1335 In->getType()->getScalarSizeInBits() - 1);
1336 In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
1337 if (In->getType() != Zext.getType())
1338 In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
1339
1340 return replaceInstUsesWith(Zext, In);
1341 }
1342
1343 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
1344 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1345 // zext (X != 0) to i32 --> X iff X has only the low bit set.
1346 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
1347
1348 if (Op1CV->isZero() && Cmp->isEquality()) {
1349 // Exactly 1 possible 1? But not the high-bit because that is
1350 // canonicalized to this form.
1351 KnownBits Known = computeKnownBits(Cmp->getOperand(0), &Zext);
1352 APInt KnownZeroMask(~Known.Zero);
1353 uint32_t ShAmt = KnownZeroMask.logBase2();
1354 bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&
1355 (Zext.getType()->getScalarSizeInBits() != ShAmt + 1);
1356 if (IsExpectShAmt &&
1357 (Cmp->getOperand(0)->getType() == Zext.getType() ||
1358 Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) {
1359 Value *In = Cmp->getOperand(0);
1360 if (ShAmt) {
1361 // Perform a logical shr by shiftamt.
1362 // Insert the shift to put the result in the low bit.
1363 In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
1364 In->getName() + ".lobit");
1365 }
1366
1367 // Toggle the low bit for "X == 0".
1368 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1369 In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));
1370
1371 if (Zext.getType() == In->getType())
1372 return replaceInstUsesWith(Zext, In);
1373
1374 Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
1375 return replaceInstUsesWith(Zext, IntCast);
1376 }
1377 }
1378 }
1379
1380 if (Cmp->isEquality()) {
1381 // Test if a bit is clear/set using a shifted-one mask:
1382 // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
1383 // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
1384 Value *X, *ShAmt;
1385 if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
1386 match(Cmp->getOperand(0),
1387 m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
1388 auto *And = cast<BinaryOperator>(Cmp->getOperand(0));
1389 Value *Shift = And->getOperand(X == And->getOperand(0) ? 1 : 0);
1390 if (Zext.getType() == And->getType() ||
1391 Cmp->getPredicate() != ICmpInst::ICMP_EQ || Shift->hasOneUse()) {
1392 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1393 X = Builder.CreateNot(X);
1394 Value *Lshr = Builder.CreateLShr(X, ShAmt);
1395 Value *And1 =
1396 Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
1397 return replaceInstUsesWith(
1398 Zext, Builder.CreateZExtOrTrunc(And1, Zext.getType()));
1399 }
1400 }
1401 }
1402
1403 return nullptr;
1404}
1405
1406/// Determine if the specified value can be computed in the specified wider type
1407/// and produce the same low bits. If not, return false.
1408///
1409/// If this function returns true, it can also return a non-zero number of bits
1410/// (in BitsToClear) which indicates that the value it computes is correct for
1411/// the zero extend, but that the additional BitsToClear bits need to be zero'd
1412/// out. For example, to promote something like:
1413///
1414/// %B = trunc i64 %A to i32
1415/// %C = lshr i32 %B, 8
1416/// %E = zext i32 %C to i64
1417///
1418/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1419/// set to 8 to indicate that the promoted value needs to have bits 24-31
1420/// cleared in addition to bits 32-63. Since an 'and' will be generated to
1421/// clear the top bits anyway, doing this has no extra cost.
1422///
1423/// This function works on both vectors and scalars.
1424bool TypeEvaluationHelper::canEvaluateZExtd(Value *V, Type *Ty,
1425 unsigned &BitsToClear,
1426 InstCombinerImpl &IC,
1427 Instruction *CxtI) {
1428 TypeEvaluationHelper TYH;
1429 return TYH.canEvaluateZExtdImpl(V, Ty, BitsToClear, IC, CxtI);
1430}
1431bool TypeEvaluationHelper::canEvaluateZExtdImpl(Value *V, Type *Ty,
1432 unsigned &BitsToClear,
1433 InstCombinerImpl &IC,
1434 Instruction *CxtI) {
1435 BitsToClear = 0;
1436 if (canAlwaysEvaluateInType(V, Ty))
1437 return true;
1438 // We stick to the one-user limit for the ZExt transform due to the fact
1439 // that this predicate returns two values: predicate result and BitsToClear.
1440 if (canNotEvaluateInType(V, Ty))
1441 return false;
1442
1443 auto *I = cast<Instruction>(V);
1444 unsigned Tmp;
1445 switch (I->getOpcode()) {
1446 case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1447 case Instruction::SExt: // zext(sext(x)) -> sext(x).
1448 case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1449 return true;
1450 case Instruction::And:
1451 case Instruction::Or:
1452 case Instruction::Xor:
1453 case Instruction::Add:
1454 case Instruction::Sub:
1455 case Instruction::Mul:
1456 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1457 !canEvaluateZExtdImpl(I->getOperand(1), Ty, Tmp, IC, CxtI))
1458 return false;
1459 // These can all be promoted if neither operand has 'bits to clear'.
1460 if (BitsToClear == 0 && Tmp == 0)
1461 return true;
1462
1463 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1464 // other side, BitsToClear is ok.
1465 if (Tmp == 0 && I->isBitwiseLogicOp()) {
1466 // We use MaskedValueIsZero here for generality, but the case we care
1467 // about the most is constant RHS.
1468 unsigned VSize = V->getType()->getScalarSizeInBits();
1469 if (IC.MaskedValueIsZero(I->getOperand(1),
1470 APInt::getHighBitsSet(VSize, BitsToClear),
1471 CxtI)) {
1472 // If this is an And instruction and all of the BitsToClear are
1473 // known to be zero we can reset BitsToClear.
1474 if (I->getOpcode() == Instruction::And)
1475 BitsToClear = 0;
1476 return true;
1477 }
1478 }
1479
1480 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1481 return false;
1482
1483 case Instruction::Shl: {
1484 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1485 // upper bits we can reduce BitsToClear by the shift amount.
1486 uint64_t ShiftAmt;
1487 if (match(I->getOperand(1), m_ConstantInt(ShiftAmt))) {
1488 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1489 return false;
1490 BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1491 return true;
1492 }
1493 return false;
1494 }
1495 case Instruction::LShr: {
1496 // We can promote lshr(x, cst) if we can promote x. This requires the
1497 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1498 uint64_t ShiftAmt;
1499 if (match(I->getOperand(1), m_ConstantInt(ShiftAmt))) {
1500 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1501 return false;
1502 BitsToClear += ShiftAmt;
1503 if (BitsToClear > V->getType()->getScalarSizeInBits())
1504 BitsToClear = V->getType()->getScalarSizeInBits();
1505 return true;
1506 }
1507 // Cannot promote variable LSHR.
1508 return false;
1509 }
1510 case Instruction::Select:
1511 if (!canEvaluateZExtdImpl(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1512 !canEvaluateZExtdImpl(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1513 // TODO: If important, we could handle the case when the BitsToClear are
1514 // known zero in the disagreeing side.
1515 Tmp != BitsToClear)
1516 return false;
1517 return true;
1518
1519 case Instruction::PHI: {
1520 // We can change a phi if we can change all operands. Note that we never
1521 // get into trouble with cyclic PHIs here because we only consider
1522 // instructions with a single use.
1523 PHINode *PN = cast<PHINode>(I);
1524 if (!canEvaluateZExtdImpl(PN->getIncomingValue(0), Ty, BitsToClear, IC,
1525 CxtI))
1526 return false;
1527 for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1528 if (!canEvaluateZExtdImpl(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1529 // TODO: If important, we could handle the case when the BitsToClear
1530 // are known zero in the disagreeing input.
1531 Tmp != BitsToClear)
1532 return false;
1533 return true;
1534 }
1535 case Instruction::Call:
1536 // llvm.vscale() can always be executed in larger type, because the
1537 // value is automatically zero-extended.
1539 if (II->getIntrinsicID() == Intrinsic::vscale)
1540 return true;
1541 return false;
1542 default:
1543 // TODO: Can handle more cases here.
1544 return false;
1545 }
1546}
1547
1549 // If this zero extend is only used by a truncate, let the truncate be
1550 // eliminated before we try to optimize this zext.
1551 if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&
1552 !isa<Constant>(Zext.getOperand(0)))
1553 return nullptr;
1554
1555 // If one of the common conversion will work, do it.
1556 if (Instruction *Result = commonCastTransforms(Zext))
1557 return Result;
1558
1559 if (auto *NewI = foldExtractionOfVectorDeinterleave(Zext))
1560 return NewI;
1561
1562 Value *Src = Zext.getOperand(0);
1563 Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
1564
1565 // zext nneg bool x -> 0
1566 if (SrcTy->isIntOrIntVectorTy(1) && Zext.hasNonNeg())
1568
1569 // Try to extend the entire expression tree to the wide destination type.
1570 unsigned BitsToClear;
1571 if (shouldChangeType(SrcTy, DestTy) &&
1572 TypeEvaluationHelper::canEvaluateZExtd(Src, DestTy, BitsToClear, *this,
1573 &Zext)) {
1574 assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1575 "Can't clear more bits than in SrcTy");
1576
1577 // Okay, we can transform this! Insert the new expression now.
1578 LLVM_DEBUG(
1579 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1580 " to avoid zero extend: "
1581 << Zext << '\n');
1582 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1583 assert(Res->getType() == DestTy);
1584
1585 // Preserve debug values referring to Src if the zext is its last use.
1586 if (auto *SrcOp = dyn_cast<Instruction>(Src))
1587 if (SrcOp->hasOneUse())
1588 replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);
1589
1590 uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1591 uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1592
1593 // If the high bits are already filled with zeros, just replace this
1594 // cast with the result.
1596 Res, APInt::getHighBitsSet(DestBitSize, DestBitSize - SrcBitsKept),
1597 &Zext))
1598 return replaceInstUsesWith(Zext, Res);
1599
1600 // We need to emit an AND to clear the high bits.
1601 Constant *C = ConstantInt::get(Res->getType(),
1602 APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1603 return BinaryOperator::CreateAnd(Res, C);
1604 }
1605
1606 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1607 // types and if the sizes are just right we can convert this into a logical
1608 // 'and' which will be much cheaper than the pair of casts.
1609 if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1610 // TODO: Subsume this into EvaluateInDifferentType.
1611
1612 // Get the sizes of the types involved. We know that the intermediate type
1613 // will be smaller than A or C, but don't know the relation between A and C.
1614 Value *A = CSrc->getOperand(0);
1615 unsigned SrcSize = A->getType()->getScalarSizeInBits();
1616 unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1617 unsigned DstSize = DestTy->getScalarSizeInBits();
1618 // If we're actually extending zero bits, then if
1619 // SrcSize < DstSize: zext(a & mask)
1620 // SrcSize == DstSize: a & mask
1621 // SrcSize > DstSize: trunc(a) & mask
1622 if (SrcSize < DstSize) {
1623 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1624 Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1625 Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1626 return new ZExtInst(And, DestTy);
1627 }
1628
1629 if (SrcSize == DstSize) {
1630 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1631 return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1632 AndValue));
1633 }
1634 if (SrcSize > DstSize) {
1635 Value *Trunc = Builder.CreateTrunc(A, DestTy);
1636 APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1637 return BinaryOperator::CreateAnd(Trunc,
1638 ConstantInt::get(Trunc->getType(),
1639 AndValue));
1640 }
1641 }
1642
1643 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1644 return transformZExtICmp(Cmp, Zext);
1645
1646 // zext(trunc(X) & C) -> (X & zext(C)).
1647 Constant *C;
1648 Value *X;
1649 if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1650 X->getType() == DestTy)
1651 return BinaryOperator::CreateAnd(X, Builder.CreateZExt(C, DestTy));
1652
1653 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1654 Value *And;
1655 if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1657 X->getType() == DestTy) {
1658 Value *ZC = Builder.CreateZExt(C, DestTy);
1659 return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1660 }
1661
1662 // If we are truncating, masking, and then zexting back to the original type,
1663 // that's just a mask. This is not handled by canEvaluateZextd if the
1664 // intermediate values have extra uses. This could be generalized further for
1665 // a non-constant mask operand.
1666 // zext (and (trunc X), C) --> and X, (zext C)
1667 if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&
1668 X->getType() == DestTy) {
1669 Value *ZextC = Builder.CreateZExt(C, DestTy);
1670 return BinaryOperator::CreateAnd(X, ZextC);
1671 }
1672
1673 if (match(Src, m_VScale())) {
1674 if (Zext.getFunction() &&
1675 Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1676 Attribute Attr =
1677 Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1678 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1679 unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1680 if (Log2_32(*MaxVScale) < TypeWidth)
1681 return replaceInstUsesWith(Zext, Builder.CreateVScale(DestTy));
1682 }
1683 }
1684 }
1685
1686 if (!Zext.hasNonNeg()) {
1687 // If this zero extend is only used by a shift, add nneg flag.
1688 if (Zext.hasOneUse() &&
1689 SrcTy->getScalarSizeInBits() >
1690 Log2_64_Ceil(DestTy->getScalarSizeInBits()) &&
1691 match(Zext.user_back(), m_Shift(m_Value(), m_Specific(&Zext)))) {
1692 Zext.setNonNeg();
1693 return &Zext;
1694 }
1695
1696 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Zext))) {
1697 Zext.setNonNeg();
1698 return &Zext;
1699 }
1700 }
1701
1702 return nullptr;
1703}
1704
1705/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1706Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
1707 SExtInst &Sext) {
1708 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1709 ICmpInst::Predicate Pred = Cmp->getPredicate();
1710
1711 // Don't bother if Op1 isn't of vector or integer type.
1712 if (!Op1->getType()->isIntOrIntVectorTy())
1713 return nullptr;
1714
1715 if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {
1716 // sext (x <s 0) --> ashr x, 31 (all ones if negative)
1717 Value *Sh = ConstantInt::get(Op0->getType(),
1718 Op0->getType()->getScalarSizeInBits() - 1);
1719 Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1720 if (In->getType() != Sext.getType())
1721 In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);
1722
1723 return replaceInstUsesWith(Sext, In);
1724 }
1725
1726 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1727 // If we know that only one bit of the LHS of the icmp can be set and we
1728 // have an equality comparison with zero or a power of 2, we can transform
1729 // the icmp and sext into bitwise/integer operations.
1730 if (Cmp->hasOneUse() &&
1731 Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1732 KnownBits Known = computeKnownBits(Op0, &Sext);
1733
1734 APInt KnownZeroMask(~Known.Zero);
1735 if (KnownZeroMask.isPowerOf2()) {
1736 Value *In = Cmp->getOperand(0);
1737
1738 // If the icmp tests for a known zero bit we can constant fold it.
1739 if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1740 Value *V = Pred == ICmpInst::ICMP_NE ?
1742 ConstantInt::getNullValue(Sext.getType());
1743 return replaceInstUsesWith(Sext, V);
1744 }
1745
1746 if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1747 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1748 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1749 unsigned ShiftAmt = KnownZeroMask.countr_zero();
1750 // Perform a right shift to place the desired bit in the LSB.
1751 if (ShiftAmt)
1752 In = Builder.CreateLShr(In,
1753 ConstantInt::get(In->getType(), ShiftAmt));
1754
1755 // At this point "In" is either 1 or 0. Subtract 1 to turn
1756 // {1, 0} -> {0, -1}.
1757 In = Builder.CreateAdd(In,
1758 ConstantInt::getAllOnesValue(In->getType()),
1759 "sext");
1760 } else {
1761 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1762 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1763 unsigned ShiftAmt = KnownZeroMask.countl_zero();
1764 // Perform a left shift to place the desired bit in the MSB.
1765 if (ShiftAmt)
1766 In = Builder.CreateShl(In,
1767 ConstantInt::get(In->getType(), ShiftAmt));
1768
1769 // Distribute the bit over the whole bit width.
1770 In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1771 KnownZeroMask.getBitWidth() - 1), "sext");
1772 }
1773
1774 if (Sext.getType() == In->getType())
1775 return replaceInstUsesWith(Sext, In);
1776 return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);
1777 }
1778 }
1779 }
1780
1781 return nullptr;
1782}
1783
1784/// Return true if we can take the specified value and return it as type Ty
1785/// without inserting any new casts and without changing the value of the common
1786/// low bits. This is used by code that tries to promote integer operations to
1787/// a wider types will allow us to eliminate the extension.
1788///
1789/// This function works on both vectors and scalars.
1790///
1791bool TypeEvaluationHelper::canEvaluateSExtd(Value *V, Type *Ty) {
1792 TypeEvaluationHelper TYH;
1793 return TYH.canEvaluateSExtdImpl(V, Ty) && TYH.allPendingVisited();
1794}
1795
1796bool TypeEvaluationHelper::canEvaluateSExtdImpl(Value *V, Type *Ty) {
1797 return canEvaluate(V, Ty, [this](Value *V, Type *Ty) {
1798 return canEvaluateSExtdPred(V, Ty);
1799 });
1800}
1801
1802bool TypeEvaluationHelper::canEvaluateSExtdPred(Value *V, Type *Ty) {
1803 assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1804 "Can't sign extend type to a smaller type");
1805
1806 auto *I = cast<Instruction>(V);
1807 switch (I->getOpcode()) {
1808 case Instruction::SExt: // sext(sext(x)) -> sext(x)
1809 case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1810 case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1811 return true;
1812 case Instruction::And:
1813 case Instruction::Or:
1814 case Instruction::Xor:
1815 case Instruction::Add:
1816 case Instruction::Sub:
1817 case Instruction::Mul:
1818 // These operators can all arbitrarily be extended if their inputs can.
1819 return canEvaluateSExtdImpl(I->getOperand(0), Ty) &&
1820 canEvaluateSExtdImpl(I->getOperand(1), Ty);
1821
1822 // case Instruction::Shl: TODO
1823 // case Instruction::LShr: TODO
1824
1825 case Instruction::Select:
1826 return canEvaluateSExtdImpl(I->getOperand(1), Ty) &&
1827 canEvaluateSExtdImpl(I->getOperand(2), Ty);
1828
1829 case Instruction::PHI: {
1830 // We can change a phi if we can change all operands. Note that we never
1831 // get into trouble with cyclic PHIs here because canEvaluate handles use
1832 // chain loops.
1833 PHINode *PN = cast<PHINode>(I);
1834 for (Value *IncValue : PN->incoming_values())
1835 if (!canEvaluateSExtdImpl(IncValue, Ty))
1836 return false;
1837 return true;
1838 }
1839 default:
1840 // TODO: Can handle more cases here.
1841 break;
1842 }
1843
1844 return false;
1845}
1846
1848 // If this sign extend is only used by a truncate, let the truncate be
1849 // eliminated before we try to optimize this sext.
1850 if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))
1851 return nullptr;
1852
1853 if (Instruction *I = commonCastTransforms(Sext))
1854 return I;
1855
1856 Value *Src = Sext.getOperand(0);
1857 Type *SrcTy = Src->getType(), *DestTy = Sext.getType();
1858 unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1859 unsigned DestBitSize = DestTy->getScalarSizeInBits();
1860
1861 // If the value being extended is zero or positive, use a zext instead.
1862 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Sext))) {
1863 auto CI = CastInst::Create(Instruction::ZExt, Src, DestTy);
1864 CI->setNonNeg(true);
1865 return CI;
1866 }
1867
1868 // Try to extend the entire expression tree to the wide destination type.
1869 bool ShouldExtendExpression = true;
1870 Value *TruncSrc = nullptr;
1871 // It is not desirable to extend expression in the trunc + sext pattern when
1872 // destination type is narrower than original (pre-trunc) type.
1873 if (match(Src, m_Trunc(m_Value(TruncSrc))))
1874 if (TruncSrc->getType()->getScalarSizeInBits() > DestBitSize)
1875 ShouldExtendExpression = false;
1876 if (ShouldExtendExpression && shouldChangeType(SrcTy, DestTy) &&
1877 TypeEvaluationHelper::canEvaluateSExtd(Src, DestTy)) {
1878 // Okay, we can transform this! Insert the new expression now.
1879 LLVM_DEBUG(
1880 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1881 " to avoid sign extend: "
1882 << Sext << '\n');
1883 Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1884 assert(Res->getType() == DestTy);
1885
1886 // If the high bits are already filled with sign bit, just replace this
1887 // cast with the result.
1888 if (ComputeNumSignBits(Res, &Sext) > DestBitSize - SrcBitSize)
1889 return replaceInstUsesWith(Sext, Res);
1890
1891 // We need to emit a shl + ashr to do the sign extend.
1892 Value *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1893 return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1894 ShAmt);
1895 }
1896
1897 Value *X = TruncSrc;
1898 if (X) {
1899 // If the input has more sign bits than bits truncated, then convert
1900 // directly to final type.
1901 unsigned XBitSize = X->getType()->getScalarSizeInBits();
1902 bool HasNSW = cast<TruncInst>(Src)->hasNoSignedWrap();
1903 if (HasNSW || (ComputeNumSignBits(X, &Sext) > XBitSize - SrcBitSize)) {
1904 auto *Res = CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1905 if (auto *ResTrunc = dyn_cast<TruncInst>(Res); ResTrunc && HasNSW)
1906 ResTrunc->setHasNoSignedWrap(true);
1907 return Res;
1908 }
1909
1910 // If input is a trunc from the destination type, then convert into shifts.
1911 if (Src->hasOneUse() && X->getType() == DestTy) {
1912 // sext (trunc X) --> ashr (shl X, C), C
1913 Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1914 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1915 }
1916
1917 // If we are replacing shifted-in high zero bits with sign bits, convert
1918 // the logic shift to arithmetic shift and eliminate the cast to
1919 // intermediate type:
1920 // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1921 Value *Y;
1922 if (Src->hasOneUse() &&
1924 m_SpecificIntAllowPoison(XBitSize - SrcBitSize)))) {
1925 Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1926 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1927 }
1928 }
1929
1930 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1931 return transformSExtICmp(Cmp, Sext);
1932
1933 // If the input is a shl/ashr pair of a same constant, then this is a sign
1934 // extension from a smaller value. If we could trust arbitrary bitwidth
1935 // integers, we could turn this into a truncate to the smaller bit and then
1936 // use a sext for the whole extension. Since we don't, look deeper and check
1937 // for a truncate. If the source and dest are the same type, eliminate the
1938 // trunc and extend and just do shifts. For example, turn:
1939 // %a = trunc i32 %i to i8
1940 // %b = shl i8 %a, C
1941 // %c = ashr i8 %b, C
1942 // %d = sext i8 %c to i32
1943 // into:
1944 // %a = shl i32 %i, 32-(8-C)
1945 // %d = ashr i32 %a, 32-(8-C)
1946 Value *A = nullptr;
1947 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1948 Constant *BA = nullptr, *CA = nullptr;
1949 if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1950 m_ImmConstant(CA))) &&
1951 BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1952 Constant *WideCurrShAmt =
1953 ConstantFoldCastOperand(Instruction::SExt, CA, DestTy, DL);
1954 assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");
1955 Constant *NumLowbitsLeft = ConstantExpr::getSub(
1956 ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1957 Constant *NewShAmt = ConstantExpr::getSub(
1958 ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1959 NumLowbitsLeft);
1960 NewShAmt =
1962 A = Builder.CreateShl(A, NewShAmt, Sext.getName());
1963 return BinaryOperator::CreateAShr(A, NewShAmt);
1964 }
1965
1966 // Splatting a bit of constant-index across a value:
1967 // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1968 // If the dest type is different, use a cast (adjust use check).
1969 if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
1970 m_SpecificInt(SrcBitSize - 1))))) {
1971 Type *XTy = X->getType();
1972 unsigned XBitSize = XTy->getScalarSizeInBits();
1973 Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
1974 Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
1975 if (XTy == DestTy)
1976 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
1977 AshrAmtC);
1978 if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
1979 Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
1980 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1981 }
1982 }
1983
1984 if (match(Src, m_VScale())) {
1985 if (Sext.getFunction() &&
1986 Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1987 Attribute Attr =
1988 Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1989 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
1990 if (Log2_32(*MaxVScale) < (SrcBitSize - 1))
1991 return replaceInstUsesWith(Sext, Builder.CreateVScale(DestTy));
1992 }
1993 }
1994
1995 // sext(scmp(x, y)) -> scmp(x, y) with a wider result type.
1996 // sext(ucmp(x, y)) -> ucmp(x, y) with a wider result type.
1997 // scmp/ucmp return only -1, 0, or 1, which sign-extend correctly to any
1998 // wider integer type, so we can sink the extension into the intrinsic.
1999 if (auto *II = dyn_cast<IntrinsicInst>(Src)) {
2000 Intrinsic::ID IID = II->getIntrinsicID();
2001 if ((IID == Intrinsic::scmp || IID == Intrinsic::ucmp) && II->hasOneUse())
2002 return replaceInstUsesWith(
2003 Sext, Builder.CreateIntrinsic(
2004 DestTy, IID, {II->getArgOperand(0), II->getArgOperand(1)}));
2005 }
2006
2007 return nullptr;
2008}
2009
2010/// Return a Constant* for the specified floating-point constant if it fits
2011/// in the specified FP type without changing its value.
2012static bool fitsInFPType(APFloat F, const fltSemantics &Sem) {
2013 bool losesInfo;
2014 (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
2015 return !losesInfo;
2016}
2017
2019 bool PreferBFloat) {
2020 // See if the value can be truncated to bfloat and then reextended.
2021 if (PreferBFloat && fitsInFPType(F, APFloat::BFloat()))
2022 return Type::getBFloatTy(Ctx);
2023 // See if the value can be truncated to half and then reextended.
2024 if (!PreferBFloat && fitsInFPType(F, APFloat::IEEEhalf()))
2025 return Type::getHalfTy(Ctx);
2026 // See if the value can be truncated to float and then reextended.
2028 return Type::getFloatTy(Ctx);
2029 if (&F.getSemantics() == &APFloat::IEEEdouble())
2030 return nullptr; // Won't shrink.
2031 // See if the value can be truncated to double and then reextended.
2033 return Type::getDoubleTy(Ctx);
2034 // Don't try to shrink to various long double types.
2035 return nullptr;
2036}
2037
2038static Type *shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat) {
2039 Type *Ty = CFP->getType();
2040 if (Ty->getScalarType()->isPPC_FP128Ty())
2041 return nullptr; // No constant folding of this.
2042
2043 Type *ShrinkTy =
2044 shrinkFPConstant(CFP->getContext(), CFP->getValueAPF(), PreferBFloat);
2045 if (ShrinkTy)
2046 if (auto *VecTy = dyn_cast<VectorType>(Ty))
2047 ShrinkTy = VectorType::get(ShrinkTy, VecTy);
2048
2049 return ShrinkTy;
2050}
2051
2052// Determine if this is a vector of ConstantFPs and if so, return the minimal
2053// type we can safely truncate all elements to.
2054static Type *shrinkFPConstantVector(Value *V, bool PreferBFloat) {
2055 auto *CV = dyn_cast<Constant>(V);
2056 auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
2057 if (!CV || !CVVTy)
2058 return nullptr;
2059
2060 Type *MinType = nullptr;
2061
2062 unsigned NumElts = CVVTy->getNumElements();
2063
2064 // For fixed-width vectors we find the minimal type by looking
2065 // through the constant values of the vector.
2066 for (unsigned I = 0; I != NumElts; ++I) {
2067 if (match(CV->getAggregateElement(I), m_Poison()))
2068 continue;
2069
2070 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(I));
2071 if (!CFP)
2072 return nullptr;
2073
2074 Type *T = shrinkFPConstant(CFP, PreferBFloat);
2075 if (!T)
2076 return nullptr;
2077
2078 // If we haven't found a type yet or this type has a larger mantissa than
2079 // our previous type, this is our new minimal type.
2080 if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
2081 MinType = T;
2082 }
2083
2084 // Make a vector type from the minimal type.
2085 return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;
2086}
2087
2088/// Find the minimum FP type we can safely truncate to.
2089static Type *getMinimumFPType(Value *V, Type *PreferredTy, InstCombiner &IC) {
2090 if (auto *FPExt = dyn_cast<FPExtInst>(V))
2091 return FPExt->getOperand(0)->getType();
2092
2093 Value *Src;
2094 if (match(V, m_IToFP(m_Value(Src))) &&
2095 IC.canBeCastedExactlyIntToFP(Src, PreferredTy, isa<SIToFPInst>(V),
2097 return PreferredTy;
2098
2099 bool PreferBFloat = PreferredTy->getScalarType()->isBFloatTy();
2100 // If this value is a constant, return the constant in the smallest FP type
2101 // that can accurately represent it. This allows us to turn
2102 // (float)((double)X+2.0) into x+2.0f.
2103 if (auto *CFP = dyn_cast<ConstantFP>(V))
2104 if (Type *T = shrinkFPConstant(CFP, PreferBFloat))
2105 return T;
2106
2107 // Try to shrink scalable and fixed splat vectors.
2108 if (auto *FPC = dyn_cast<Constant>(V))
2109 if (auto *VTy = dyn_cast<VectorType>(V->getType()))
2110 if (auto *Splat = dyn_cast_or_null<ConstantFP>(FPC->getSplatValue()))
2111 if (Type *T = shrinkFPConstant(Splat, PreferBFloat))
2112 return VectorType::get(T, VTy);
2113
2114 // Try to shrink a vector of FP constants. This returns nullptr on scalable
2115 // vectors
2116 if (Type *T = shrinkFPConstantVector(V, PreferBFloat))
2117 return T;
2118
2119 return V->getType();
2120}
2121
2123 bool IsSigned,
2124 const Instruction *CxtI) const {
2125 Type *SrcTy = V->getType();
2126 assert(SrcTy->isIntOrIntVectorTy() && "Expected an integer type");
2127 int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
2128 int DestNumSigBits = FPTy->getFPMantissaWidth();
2129
2130 // Easy case - if the source integer type has less bits than the FP mantissa,
2131 // then the cast must be exact.
2132 if (SrcSize <= DestNumSigBits)
2133 return true;
2134
2135 // Cast from FP to integer and back to FP is independent of the intermediate
2136 // integer width because of poison on overflow.
2137 Value *F;
2138 if (match(V, m_FPToI(m_Value(F)))) {
2139 // If this is uitofp (fptosi F), the source needs an extra bit to avoid
2140 // potential rounding of negative FP input values.
2141 int SrcNumSigBits = F->getType()->getFPMantissaWidth();
2142 if (!IsSigned && match(V, m_FPToSI(m_Value())))
2143 SrcNumSigBits++;
2144
2145 // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
2146 // significant bits than the destination (and make sure neither type is
2147 // weird -- ppc_fp128).
2148 if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
2149 SrcNumSigBits <= DestNumSigBits)
2150 return true;
2151 }
2152
2153 // Try harder to find if the source integer type has less significant bits.
2154 // Compute number of sign bits or determine trailing zeros.
2155 KnownBits SrcKnown = computeKnownBits(V, CxtI);
2156 int SigBits = (int)SrcTy->getScalarSizeInBits() -
2157 SrcKnown.countMinLeadingZeros() -
2158 SrcKnown.countMinTrailingZeros();
2159 if (SigBits <= DestNumSigBits)
2160 return true;
2161
2162 // For sitofp, the sign maps to the FP sign bit, so only magnitude bits
2163 // (BitWidth - NumSignBits) consume mantissa.
2164 if (IsSigned) {
2165 SigBits = (int)SrcTy->getScalarSizeInBits() - ComputeNumSignBits(V, CxtI);
2166 if (SigBits <= DestNumSigBits)
2167 return true;
2168 }
2169
2170 return false;
2171}
2172
2174 CastInst::CastOps Opcode = I.getOpcode();
2175 assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
2176 "Unexpected cast");
2177 Value *Src = I.getOperand(0);
2178 Type *FPTy = I.getType();
2179 return canBeCastedExactlyIntToFP(Src, FPTy, Opcode == CastInst::SIToFP, &I);
2180}
2181
2184 return I;
2185
2186 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
2187 // simplify this expression to avoid one or more of the trunc/extend
2188 // operations if we can do so without changing the numerical results.
2189 //
2190 // The exact manner in which the widths of the operands interact to limit
2191 // what we can and cannot do safely varies from operation to operation, and
2192 // is explained below in the various case statements.
2193 Type *Ty = FPT.getType();
2194 auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
2195 if (BO && BO->hasOneUse()) {
2196 Type *LHSMinType = getMinimumFPType(BO->getOperand(0), Ty, *this);
2197 Type *RHSMinType = getMinimumFPType(BO->getOperand(1), Ty, *this);
2198 unsigned OpWidth = BO->getType()->getFPMantissaWidth();
2199 unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
2200 unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
2201 unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
2202 unsigned DstWidth = Ty->getFPMantissaWidth();
2203 switch (BO->getOpcode()) {
2204 default: break;
2205 case Instruction::FAdd:
2206 case Instruction::FSub:
2207 // For addition and subtraction, the infinitely precise result can
2208 // essentially be arbitrarily wide; proving that double rounding
2209 // will not occur because the result of OpI is exact (as we will for
2210 // FMul, for example) is hopeless. However, we *can* nonetheless
2211 // frequently know that double rounding cannot occur (or that it is
2212 // innocuous) by taking advantage of the specific structure of
2213 // infinitely-precise results that admit double rounding.
2214 //
2215 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
2216 // to represent both sources, we can guarantee that the double
2217 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
2218 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
2219 // for proof of this fact).
2220 //
2221 // Note: Figueroa does not consider the case where DstFormat !=
2222 // SrcFormat. It's possible (likely even!) that this analysis
2223 // could be tightened for those cases, but they are rare (the main
2224 // case of interest here is (float)((double)float + float)).
2225 if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
2226 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2227 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2228 Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
2229 RI->copyFastMathFlags(BO);
2230 return RI;
2231 }
2232 break;
2233 case Instruction::FMul:
2234 // For multiplication, the infinitely precise result has at most
2235 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
2236 // that such a value can be exactly represented, then no double
2237 // rounding can possibly occur; we can safely perform the operation
2238 // in the destination format if it can represent both sources.
2239 if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
2240 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2241 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2242 return BinaryOperator::CreateFMulFMF(LHS, RHS, BO);
2243 }
2244 break;
2245 case Instruction::FDiv:
2246 // For division, we use again use the bound from Figueroa's
2247 // dissertation. I am entirely certain that this bound can be
2248 // tightened in the unbalanced operand case by an analysis based on
2249 // the diophantine rational approximation bound, but the well-known
2250 // condition used here is a good conservative first pass.
2251 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
2252 if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
2253 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2254 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2255 return BinaryOperator::CreateFDivFMF(LHS, RHS, BO);
2256 }
2257 break;
2258 case Instruction::FRem: {
2259 // Remainder is straightforward. Remainder is always exact, so the
2260 // type of OpI doesn't enter into things at all. We simply evaluate
2261 // in whichever source type is larger, then convert to the
2262 // destination type.
2263 if (SrcWidth == OpWidth)
2264 break;
2265 Value *LHS, *RHS;
2266 if (LHSWidth == SrcWidth) {
2267 LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
2268 RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
2269 } else {
2270 LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
2271 RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
2272 }
2273
2274 Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
2275 return CastInst::CreateFPCast(ExactResult, Ty);
2276 }
2277 }
2278 }
2279
2280 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
2281 Value *X;
2283 if (Op && Op->hasOneUse()) {
2284 FastMathFlags FMF = FPT.getFastMathFlags();
2285 if (auto *FPMO = dyn_cast<FPMathOperator>(Op))
2286 FMF &= FPMO->getFastMathFlags();
2287
2288 if (match(Op, m_FNeg(m_Value(X)))) {
2289 Value *InnerTrunc = Builder.CreateFPTruncFMF(X, Ty, FMF);
2290 Value *Neg = Builder.CreateFNegFMF(InnerTrunc, FMF);
2291 return replaceInstUsesWith(FPT, Neg);
2292 }
2293
2294 // If we are truncating a select that has an extended operand, we can
2295 // narrow the other operand and do the select as a narrow op.
2296 Value *Cond, *X, *Y;
2298 X->getType() == Ty) {
2299 // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
2300 Value *NarrowY = Builder.CreateFPTruncFMF(Y, Ty, FMF);
2301 Value *Sel =
2302 Builder.CreateSelectFMF(Cond, X, NarrowY, FMF, "narrow.sel", Op);
2303 return replaceInstUsesWith(FPT, Sel);
2304 }
2306 X->getType() == Ty) {
2307 // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
2308 Value *NarrowY = Builder.CreateFPTruncFMF(Y, Ty, FMF);
2309 Value *Sel =
2310 Builder.CreateSelectFMF(Cond, NarrowY, X, FMF, "narrow.sel", Op);
2311 return replaceInstUsesWith(FPT, Sel);
2312 }
2313 }
2314
2315 if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
2316 switch (II->getIntrinsicID()) {
2317 default: break;
2318 case Intrinsic::ceil:
2319 case Intrinsic::fabs:
2320 case Intrinsic::floor:
2321 case Intrinsic::nearbyint:
2322 case Intrinsic::rint:
2323 case Intrinsic::round:
2324 case Intrinsic::roundeven:
2325 case Intrinsic::trunc: {
2326 Value *Src = II->getArgOperand(0);
2327 if (!Src->hasOneUse())
2328 break;
2329
2330 // Except for fabs, this transformation requires the input of the unary FP
2331 // operation to be itself an fpext from the type to which we're
2332 // truncating.
2333 if (II->getIntrinsicID() != Intrinsic::fabs) {
2334 FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
2335 if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
2336 break;
2337 }
2338
2339 // Do unary FP operation on smaller type.
2340 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
2341 Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
2343 FPT.getModule(), II->getIntrinsicID(), Ty);
2345 II->getOperandBundlesAsDefs(OpBundles);
2346 CallInst *NewCI =
2347 CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
2348 // A normal value may be converted to an infinity. It means that we cannot
2349 // propagate ninf from the intrinsic. So we propagate FMF from fptrunc.
2350 NewCI->copyFastMathFlags(&FPT);
2351 return NewCI;
2352 }
2353 }
2354 }
2355
2356 if (Instruction *I = shrinkInsertElt(FPT, Builder))
2357 return I;
2358
2359 Value *Src = FPT.getOperand(0);
2360 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
2361 auto *FPCast = cast<CastInst>(Src);
2362 if (isKnownExactCastIntToFP(*FPCast))
2363 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
2364 }
2365
2366 return nullptr;
2367}
2368
2370 // If the source operand is a cast from integer to FP and known exact, then
2371 // cast the integer operand directly to the destination type.
2372 Type *Ty = FPExt.getType();
2373 Value *Src = FPExt.getOperand(0);
2374 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
2375 auto *FPCast = cast<CastInst>(Src);
2376 if (isKnownExactCastIntToFP(*FPCast))
2377 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
2378 }
2379
2380 return commonCastTransforms(FPExt);
2381}
2382
2383/// fpto{s/u}i[.sat]({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
2384/// This is safe if the intermediate type has enough bits in its mantissa to
2385/// accurately represent all values of X. For example, this won't work with
2386/// i64 -> float -> i64.
2387template <typename FPToIntTy>
2389 constexpr bool IsSaturating = std::is_same_v<FPToIntTy, IntrinsicInst>;
2390
2391 if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
2392 return nullptr;
2393
2394 auto *OpI = cast<CastInst>(FI.getOperand(0));
2395 Value *X = OpI->getOperand(0);
2396 Type *XType = X->getType();
2397 Type *DestType = FI.getType();
2398 bool IsInputSigned = isa<SIToFPInst>(OpI);
2399
2400 bool IsOutputSigned;
2401 if constexpr (IsSaturating)
2402 IsOutputSigned = FI.getIntrinsicID() == Intrinsic::fptosi_sat;
2403 else
2404 IsOutputSigned = isa<FPToSIInst>(FI);
2405
2406 // Since we can assume the conversion won't overflow, our decision as to
2407 // whether the input will fit in the float should depend on the minimum
2408 // of the input range and output range.
2409
2410 // This means this is also safe for a signed input and unsigned output, since
2411 // a negative input would lead to undefined behavior.
2412 if (!isKnownExactCastIntToFP(*OpI)) {
2413 if constexpr (!IsSaturating) {
2414 // The first cast may not round exactly based on the source integer width
2415 // and FP width, but the overflow UB rules can still allow this to fold.
2416 // If the destination type is narrow, that means the intermediate FP value
2417 // must be large enough to hold the source value exactly.
2418 //
2419 // For example, (uint8_t)((float)(uint32_t 16777217) is UB.
2420 int OutputSize = (int)DestType->getScalarSizeInBits();
2421 if (OutputSize > OpI->getType()->getFPMantissaWidth())
2422 return nullptr;
2423 } else {
2424 // Sat intrinsics produce a defined saturated value on overflow, so
2425 // the UB-based shortcut is invalid. Require exactness.
2426 return nullptr;
2427 }
2428 }
2429
2430 unsigned SrcWidth = XType->getScalarSizeInBits();
2431 unsigned DestWidth = DestType->getScalarSizeInBits();
2432
2433 if constexpr (IsSaturating) {
2434 // TODO: cross-sign and narrowing cases could be handled with range
2435 // analysis to prove the source fits in the destination.
2436 if (IsInputSigned != IsOutputSigned || DestWidth < SrcWidth)
2437 return nullptr;
2438 }
2439
2440 if (DestWidth > SrcWidth) {
2441 if (IsInputSigned && IsOutputSigned)
2442 return new SExtInst(X, DestType);
2443 return new ZExtInst(X, DestType);
2444 }
2445 if (DestWidth < SrcWidth)
2446 return new TruncInst(X, DestType);
2447
2448 assert(XType == DestType && "Unexpected types for int to FP to int casts");
2449 return replaceInstUsesWith(FI, X);
2450}
2451
2453template Instruction *
2455
2457 // fpto{u/s}i non-norm --> 0
2458 FPClassTest Mask =
2459 FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal;
2461 FI.getOperand(0), Mask, IC.getSimplifyQuery().getWithInstruction(&FI));
2462 if (FPClass.isKnownNever(Mask))
2464
2465 return nullptr;
2466}
2467
2469 if (Instruction *I = foldItoFPtoI(FI))
2470 return I;
2471
2472 if (Instruction *I = foldFPtoI(FI, *this))
2473 return I;
2474
2475 return commonCastTransforms(FI);
2476}
2477
2479 if (Instruction *I = foldItoFPtoI(FI))
2480 return I;
2481
2482 if (Instruction *I = foldFPtoI(FI, *this))
2483 return I;
2484
2485 return commonCastTransforms(FI);
2486}
2487
2489 if (Instruction *R = commonCastTransforms(CI))
2490 return R;
2491 if (!CI.hasNonNeg() && isKnownNonNegative(CI.getOperand(0), SQ)) {
2492 CI.setNonNeg();
2493 return &CI;
2494 }
2495 return nullptr;
2496}
2497
2499 if (Instruction *R = commonCastTransforms(CI))
2500 return R;
2501 if (isKnownNonNegative(CI.getOperand(0), SQ)) {
2502 auto *UI =
2503 CastInst::Create(Instruction::UIToFP, CI.getOperand(0), CI.getType());
2504 UI->setNonNeg(true);
2505 return UI;
2506 }
2507 return nullptr;
2508}
2509
2511 // If the source integer type is not the intptr_t type for this target, do a
2512 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2513 // cast to be exposed to other transforms.
2514 unsigned AS = CI.getAddressSpace();
2515 if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
2516 DL.getPointerSizeInBits(AS)) {
2517 Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
2518 DL.getIntPtrType(CI.getContext(), AS));
2519 Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);
2520 return new IntToPtrInst(P, CI.getType());
2521 }
2522
2523 // Replace (inttoptr (add (ptrtoint %Base), %Offset)) with
2524 // (getelementptr i8, %Base, %Offset) if the pointer is only used as integer
2525 // value.
2526 Value *Base;
2527 Value *Offset;
2528 auto UsesPointerAsInt = [](User *U) {
2530 return true;
2531 if (auto *P = dyn_cast<PHINode>(U))
2532 return P->hasOneUse() && isa<ICmpInst, PtrToIntInst>(*P->user_begin());
2533 return false;
2534 };
2535 if (match(CI.getOperand(0),
2537 m_Value(Offset)))) &&
2539 Base->getType()->getPointerAddressSpace() &&
2540 all_of(CI.users(), UsesPointerAsInt)) {
2541 return GetElementPtrInst::Create(Builder.getInt8Ty(), Base, Offset);
2542 }
2543
2545 return I;
2546
2547 return nullptr;
2548}
2549
2551 // Look through chain of one-use GEPs.
2552 Type *PtrTy = Ptr->getType();
2554 while (true) {
2555 auto *GEP = dyn_cast<GEPOperator>(Ptr);
2556 if (!GEP || !GEP->hasOneUse())
2557 break;
2558 GEPs.push_back(GEP);
2559 Ptr = GEP->getPointerOperand();
2560 }
2561
2562 // Don't handle case where GEP converts from pointer to vector.
2563 if (GEPs.empty() || PtrTy != Ptr->getType())
2564 return nullptr;
2565
2566 // Check whether we know the integer value of the base pointer.
2567 Value *Res;
2568 Type *IdxTy = DL.getIndexType(PtrTy);
2569 if (match(Ptr, m_OneUse(m_IntToPtr(m_Value(Res)))) &&
2570 Res->getType() == IntTy && IntTy == IdxTy) {
2571 // pass
2572 } else if (isa<ConstantPointerNull>(Ptr)) {
2573 Res = Constant::getNullValue(IdxTy);
2574 } else {
2575 return nullptr;
2576 }
2577
2578 // Perform the entire operation on integers instead.
2579 for (GEPOperator *GEP : reverse(GEPs)) {
2580 Value *Offset = EmitGEPOffset(GEP);
2581 Res = Builder.CreateAdd(Res, Offset, "", GEP->hasNoUnsignedWrap());
2582 }
2583 return Builder.CreateZExtOrTrunc(Res, IntTy);
2584}
2585
2587 // If the destination integer type is not the intptr_t type for this target,
2588 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2589 // to be exposed to other transforms.
2591 Type *SrcTy = SrcOp->getType();
2592 Type *Ty = CI.getType();
2593 unsigned AS = CI.getPointerAddressSpace();
2594 unsigned TySize = Ty->getScalarSizeInBits();
2595 unsigned PtrSize = DL.getPointerSizeInBits(AS);
2596 if (TySize != PtrSize) {
2597 Type *IntPtrTy =
2598 SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
2599 Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
2600 return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
2601 }
2602
2603 // (ptrtoint (ptrmask P, M))
2604 // -> (and (ptrtoint P), M)
2605 // This is generally beneficial as `and` is better supported than `ptrmask`.
2606 Value *Ptr, *Mask;
2608 m_Value(Mask)))) &&
2609 Mask->getType() == Ty)
2610 return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
2611
2612 if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp))
2613 return replaceInstUsesWith(CI, V);
2614
2615 Value *Vec, *Scalar, *Index;
2617 m_Value(Scalar), m_Value(Index)))) &&
2618 Vec->getType() == Ty) {
2619 assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2620 // Convert the scalar to int followed by insert to eliminate one cast:
2621 // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2622 Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2623 return InsertElementInst::Create(Vec, NewCast, Index);
2624 }
2625
2626 return commonCastTransforms(CI);
2627}
2628
2631 Type *Ty = CI.getType();
2632
2633 // (ptrtoaddr (ptrmask P, M))
2634 // -> (and (ptrtoaddr P), M)
2635 // This is generally beneficial as `and` is better supported than `ptrmask`.
2636 Value *Ptr, *Mask;
2638 m_Value(Mask)))) &&
2639 Mask->getType() == Ty)
2640 return BinaryOperator::CreateAnd(Builder.CreatePtrToAddr(Ptr), Mask);
2641
2642 if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp))
2643 return replaceInstUsesWith(CI, V);
2644
2645 // FIXME: Implement variants of ptrtoint folds.
2646 return commonCastTransforms(CI);
2647}
2648
2649/// This input value (which is known to have vector type) is being zero extended
2650/// or truncated to the specified vector type. Since the zext/trunc is done
2651/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2652/// endianness will impact which end of the vector that is extended or
2653/// truncated.
2654///
2655/// A vector is always stored with index 0 at the lowest address, which
2656/// corresponds to the most significant bits for a big endian stored integer and
2657/// the least significant bits for little endian. A trunc/zext of an integer
2658/// impacts the big end of the integer. Thus, we need to add/remove elements at
2659/// the front of the vector for big endian targets, and the back of the vector
2660/// for little endian targets.
2661///
2662/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2663///
2664/// The source and destination vector types may have different element types.
2665static Instruction *
2667 InstCombinerImpl &IC) {
2668 // We can only do this optimization if the output is a multiple of the input
2669 // element size, or the input is a multiple of the output element size.
2670 // Convert the input type to have the same element type as the output.
2671 VectorType *SrcTy = cast<VectorType>(InVal->getType());
2672
2673 if (SrcTy->getElementType() != DestTy->getElementType()) {
2674 // The input types don't need to be identical, but for now they must be the
2675 // same size. There is no specific reason we couldn't handle things like
2676 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2677 // there yet.
2678 if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2679 DestTy->getElementType()->getPrimitiveSizeInBits())
2680 return nullptr;
2681
2682 SrcTy =
2683 FixedVectorType::get(DestTy->getElementType(),
2684 cast<FixedVectorType>(SrcTy)->getNumElements());
2685 InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2686 }
2687
2688 bool IsBigEndian = IC.getDataLayout().isBigEndian();
2689 unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2690 unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2691
2692 assert(SrcElts != DestElts && "Element counts should be different.");
2693
2694 // Now that the element types match, get the shuffle mask and RHS of the
2695 // shuffle to use, which depends on whether we're increasing or decreasing the
2696 // size of the input.
2697 auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
2698 ArrayRef<int> ShuffleMask;
2699 Value *V2;
2700
2701 if (SrcElts > DestElts) {
2702 // If we're shrinking the number of elements (rewriting an integer
2703 // truncate), just shuffle in the elements corresponding to the least
2704 // significant bits from the input and use poison as the second shuffle
2705 // input.
2706 V2 = PoisonValue::get(SrcTy);
2707 // Make sure the shuffle mask selects the "least significant bits" by
2708 // keeping elements from back of the src vector for big endian, and from the
2709 // front for little endian.
2710 ShuffleMask = ShuffleMaskStorage;
2711 if (IsBigEndian)
2712 ShuffleMask = ShuffleMask.take_back(DestElts);
2713 else
2714 ShuffleMask = ShuffleMask.take_front(DestElts);
2715 } else {
2716 // If we're increasing the number of elements (rewriting an integer zext),
2717 // shuffle in all of the elements from InVal. Fill the rest of the result
2718 // elements with zeros from a constant zero.
2719 V2 = Constant::getNullValue(SrcTy);
2720 // Use first elt from V2 when indicating zero in the shuffle mask.
2721 uint32_t NullElt = SrcElts;
2722 // Extend with null values in the "most significant bits" by adding elements
2723 // in front of the src vector for big endian, and at the back for little
2724 // endian.
2725 unsigned DeltaElts = DestElts - SrcElts;
2726 if (IsBigEndian)
2727 ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2728 else
2729 ShuffleMaskStorage.append(DeltaElts, NullElt);
2730 ShuffleMask = ShuffleMaskStorage;
2731 }
2732
2733 return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2734}
2735
2736static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2737 return Value % Ty->getPrimitiveSizeInBits() == 0;
2738}
2739
2740static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2741 return Value / Ty->getPrimitiveSizeInBits();
2742}
2743
2744/// V is a value which is inserted into a vector of VecEltTy.
2745/// Look through the value to see if we can decompose it into
2746/// insertions into the vector. See the example in the comment for
2747/// OptimizeIntegerToVectorInsertions for the pattern this handles.
2748/// The type of V is always a non-zero multiple of VecEltTy's size.
2749/// Shift is the number of bits between the lsb of V and the lsb of
2750/// the vector.
2751///
2752/// This returns false if the pattern can't be matched or true if it can,
2753/// filling in Elements with the elements found here.
2754static bool collectInsertionElements(Value *V, unsigned Shift,
2755 SmallVectorImpl<Value *> &Elements,
2756 Type *VecEltTy, bool isBigEndian) {
2757 assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2758 "Shift should be a multiple of the element type size");
2759
2760 // Poison values never contribute useful bits to the result.
2761 if (match(V, m_Poison()))
2762 return true;
2763
2764 // If we got down to a value of the right type, we win, try inserting into the
2765 // right element.
2766 if (V->getType() == VecEltTy) {
2767 // Inserting null doesn't actually insert any elements.
2768 if (Constant *C = dyn_cast<Constant>(V))
2769 if (C->isNullValue())
2770 return true;
2771
2772 unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2773 if (isBigEndian)
2774 ElementIndex = Elements.size() - ElementIndex - 1;
2775
2776 // Fail if multiple elements are inserted into this slot.
2777 if (Elements[ElementIndex])
2778 return false;
2779
2780 Elements[ElementIndex] = V;
2781 return true;
2782 }
2783
2784 if (Constant *C = dyn_cast<Constant>(V)) {
2785 // Figure out the # elements this provides, and bitcast it or slice it up
2786 // as required.
2787 unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2788 VecEltTy);
2789 // If the constant is the size of a vector element, we just need to bitcast
2790 // it to the right type so it gets properly inserted.
2791 if (NumElts == 1)
2793 Shift, Elements, VecEltTy, isBigEndian);
2794
2795 // Okay, this is a constant that covers multiple elements. Slice it up into
2796 // pieces and insert each element-sized piece into the vector.
2797 if (!isa<IntegerType>(C->getType()))
2798 C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
2799 C->getType()->getPrimitiveSizeInBits()));
2800 unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2801 Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2802
2803 for (unsigned i = 0; i != NumElts; ++i) {
2804 unsigned ShiftI = i * ElementSize;
2806 Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI));
2807 if (!Piece)
2808 return false;
2809
2810 Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2811 if (!collectInsertionElements(Piece, ShiftI + Shift, Elements, VecEltTy,
2812 isBigEndian))
2813 return false;
2814 }
2815 return true;
2816 }
2817
2818 if (!V->hasOneUse()) return false;
2819
2821 if (!I) return false;
2822 switch (I->getOpcode()) {
2823 default: return false; // Unhandled case.
2824 case Instruction::BitCast:
2825 if (I->getOperand(0)->getType()->isVectorTy())
2826 return false;
2827 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2828 isBigEndian);
2829 case Instruction::ZExt:
2831 I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2832 VecEltTy))
2833 return false;
2834 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2835 isBigEndian);
2836 case Instruction::Or:
2837 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2838 isBigEndian) &&
2839 collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2840 isBigEndian);
2841 case Instruction::Shl: {
2842 // Must be shifting by a constant that is a multiple of the element size.
2843 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2844 if (!CI) return false;
2845 Shift += CI->getZExtValue();
2846 if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2847 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2848 isBigEndian);
2849 }
2850
2851 }
2852}
2853
2854
2855/// If the input is an 'or' instruction, we may be doing shifts and ors to
2856/// assemble the elements of the vector manually.
2857/// Try to rip the code out and replace it with insertelements. This is to
2858/// optimize code like this:
2859///
2860/// %tmp37 = bitcast float %inc to i32
2861/// %tmp38 = zext i32 %tmp37 to i64
2862/// %tmp31 = bitcast float %inc5 to i32
2863/// %tmp32 = zext i32 %tmp31 to i64
2864/// %tmp33 = shl i64 %tmp32, 32
2865/// %ins35 = or i64 %tmp33, %tmp38
2866/// %tmp43 = bitcast i64 %ins35 to <2 x float>
2867///
2868/// Into two insertelements that do "buildvector{%inc, %inc5}".
2870 InstCombinerImpl &IC) {
2871 auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2872 Value *IntInput = CI.getOperand(0);
2873
2874 // if the int input is just an undef value do not try to optimize to vector
2875 // insertions as it will prevent undef propagation
2876 if (isa<UndefValue>(IntInput))
2877 return nullptr;
2878
2879 SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2880 if (!collectInsertionElements(IntInput, 0, Elements,
2881 DestVecTy->getElementType(),
2882 IC.getDataLayout().isBigEndian()))
2883 return nullptr;
2884
2885 // If we succeeded, we know that all of the element are specified by Elements
2886 // or are zero if Elements has a null entry. Recast this as a set of
2887 // insertions.
2888 Value *Result = Constant::getNullValue(CI.getType());
2889 for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2890 if (!Elements[i]) continue; // Unset element.
2891
2892 Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2893 IC.Builder.getInt32(i));
2894 }
2895
2896 return Result;
2897}
2898
2899/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2900/// vector followed by extract element. The backend tends to handle bitcasts of
2901/// vectors better than bitcasts of scalars because vector registers are
2902/// usually not type-specific like scalar integer or scalar floating-point.
2904 InstCombinerImpl &IC) {
2905 Value *VecOp, *Index;
2906 if (!match(BitCast.getOperand(0),
2907 m_OneUse(m_ExtractElt(m_Value(VecOp), m_Value(Index)))))
2908 return nullptr;
2909
2910 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2911 // type to extract from.
2912 Type *DestType = BitCast.getType();
2913 VectorType *VecType = cast<VectorType>(VecOp->getType());
2914 if (VectorType::isValidElementType(DestType)) {
2915 auto *NewVecType = VectorType::get(DestType, VecType);
2916 auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
2917 return ExtractElementInst::Create(NewBC, Index);
2918 }
2919
2920 // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2921 // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2922 auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
2923 if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
2924 return CastInst::Create(Instruction::BitCast, VecOp, DestType);
2925
2926 return nullptr;
2927}
2928
2929/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2931 InstCombiner::BuilderTy &Builder) {
2932 Type *DestTy = BitCast.getType();
2933 BinaryOperator *BO;
2934
2935 if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2936 !BO->isBitwiseLogicOp())
2937 return nullptr;
2938
2939 // FIXME: This transform is restricted to vector types to avoid backend
2940 // problems caused by creating potentially illegal operations. If a fix-up is
2941 // added to handle that situation, we can remove this check.
2942 if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2943 return nullptr;
2944
2945 if (DestTy->isFPOrFPVectorTy()) {
2946 Value *X, *Y;
2947 // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2948 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2950 if (X->getType()->isFPOrFPVectorTy() &&
2951 Y->getType()->isIntOrIntVectorTy()) {
2952 Value *CastedOp =
2953 Builder.CreateBitCast(BO->getOperand(0), Y->getType());
2954 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
2955 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2956 }
2957 if (X->getType()->isIntOrIntVectorTy() &&
2958 Y->getType()->isFPOrFPVectorTy()) {
2959 Value *CastedOp =
2960 Builder.CreateBitCast(BO->getOperand(1), X->getType());
2961 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
2962 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2963 }
2964 }
2965 return nullptr;
2966 }
2967
2968 if (!DestTy->isIntOrIntVectorTy())
2969 return nullptr;
2970
2971 Value *X;
2972 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2973 X->getType() == DestTy && !isa<Constant>(X)) {
2974 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2975 Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2976 return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2977 }
2978
2979 if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2980 X->getType() == DestTy && !isa<Constant>(X)) {
2981 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2982 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2983 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2984 }
2985
2986 // Canonicalize vector bitcasts to come before vector bitwise logic with a
2987 // constant. This eases recognition of special constants for later ops.
2988 // Example:
2989 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2990 Constant *C;
2991 if (match(BO->getOperand(1), m_Constant(C))) {
2992 // bitcast (logic X, C) --> logic (bitcast X, C')
2993 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2994 Value *CastedC = Builder.CreateBitCast(C, DestTy);
2995 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
2996 }
2997
2998 return nullptr;
2999}
3000
3001/// Change the type of a select if we can eliminate a bitcast.
3003 InstCombiner::BuilderTy &Builder) {
3004 Value *Cond, *TVal, *FVal;
3005 if (!match(BitCast.getOperand(0),
3006 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
3007 return nullptr;
3008
3009 // A vector select must maintain the same number of elements in its operands.
3010 Type *CondTy = Cond->getType();
3011 Type *DestTy = BitCast.getType();
3012
3013 auto *DestVecTy = dyn_cast<VectorType>(DestTy);
3014
3015 if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
3016 if (!DestVecTy ||
3017 CondVTy->getElementCount() != DestVecTy->getElementCount())
3018 return nullptr;
3019
3020 auto *Sel = cast<Instruction>(BitCast.getOperand(0));
3021 auto *SrcVecTy = dyn_cast<VectorType>(TVal->getType());
3022
3023 if ((isa<Constant>(TVal) || isa<Constant>(FVal)) &&
3024 (!DestVecTy ||
3025 (SrcVecTy && ElementCount::isKnownLE(DestVecTy->getElementCount(),
3026 SrcVecTy->getElementCount())))) {
3027 // Avoid introducing select of vector (or select of vector with more
3028 // elements) until the backend can undo this transformation.
3029 Value *CastedTVal = Builder.CreateBitCast(TVal, DestTy);
3030 Value *CastedFVal = Builder.CreateBitCast(FVal, DestTy);
3031 return SelectInst::Create(Cond, CastedTVal, CastedFVal, "", nullptr, Sel);
3032 }
3033
3034 // FIXME: This transform is restricted from changing the select between
3035 // scalars and vectors to avoid backend problems caused by creating
3036 // potentially illegal operations. If a fix-up is added to handle that
3037 // situation, we can remove this check.
3038 if ((DestVecTy != nullptr) != (SrcVecTy != nullptr))
3039 return nullptr;
3040
3041 Value *X;
3042 if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
3043 !isa<Constant>(X)) {
3044 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
3045 Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
3046 return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
3047 }
3048
3049 if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
3050 !isa<Constant>(X)) {
3051 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
3052 Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
3053 return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
3054 }
3055
3056 return nullptr;
3057}
3058
3059/// Check if all users of CI are StoreInsts.
3060static bool hasStoreUsersOnly(CastInst &CI) {
3061 for (User *U : CI.users()) {
3062 if (!isa<StoreInst>(U))
3063 return false;
3064 }
3065 return true;
3066}
3067
3068/// This function handles following case
3069///
3070/// A -> B cast
3071/// PHI
3072/// B -> A cast
3073///
3074/// All the related PHI nodes can be replaced by new PHI nodes with type A.
3075/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
3076Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
3077 PHINode *PN) {
3078 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
3079 if (hasStoreUsersOnly(CI))
3080 return nullptr;
3081
3082 Value *Src = CI.getOperand(0);
3083 Type *SrcTy = Src->getType(); // Type B
3084 Type *DestTy = CI.getType(); // Type A
3085
3086 SmallVector<PHINode *, 4> PhiWorklist;
3087 SmallSetVector<PHINode *, 4> OldPhiNodes;
3088
3089 // Find all of the A->B casts and PHI nodes.
3090 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
3091 // OldPhiNodes is used to track all known PHI nodes, before adding a new
3092 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
3093 PhiWorklist.push_back(PN);
3094 OldPhiNodes.insert(PN);
3095 while (!PhiWorklist.empty()) {
3096 auto *OldPN = PhiWorklist.pop_back_val();
3097 for (Value *IncValue : OldPN->incoming_values()) {
3098 if (isa<Constant>(IncValue))
3099 continue;
3100
3101 if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
3102 // If there is a sequence of one or more load instructions, each loaded
3103 // value is used as address of later load instruction, bitcast is
3104 // necessary to change the value type, don't optimize it. For
3105 // simplicity we give up if the load address comes from another load.
3106 Value *Addr = LI->getOperand(0);
3107 if (Addr == &CI || isa<LoadInst>(Addr))
3108 return nullptr;
3109 // Don't tranform "load <256 x i32>, <256 x i32>*" to
3110 // "load x86_amx, x86_amx*", because x86_amx* is invalid.
3111 // TODO: Remove this check when bitcast between vector and x86_amx
3112 // is replaced with a specific intrinsic.
3113 if (DestTy->isX86_AMXTy())
3114 return nullptr;
3115 if (LI->hasOneUse() && LI->isSimple())
3116 continue;
3117 // If a LoadInst has more than one use, changing the type of loaded
3118 // value may create another bitcast.
3119 return nullptr;
3120 }
3121
3122 if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
3123 if (OldPhiNodes.insert(PNode))
3124 PhiWorklist.push_back(PNode);
3125 continue;
3126 }
3127
3128 auto *BCI = dyn_cast<BitCastInst>(IncValue);
3129 // We can't handle other instructions.
3130 if (!BCI)
3131 return nullptr;
3132
3133 // Verify it's a A->B cast.
3134 Type *TyA = BCI->getOperand(0)->getType();
3135 Type *TyB = BCI->getType();
3136 if (TyA != DestTy || TyB != SrcTy)
3137 return nullptr;
3138 }
3139 }
3140
3141 // Check that each user of each old PHI node is something that we can
3142 // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
3143 for (auto *OldPN : OldPhiNodes) {
3144 for (User *V : OldPN->users()) {
3145 if (auto *SI = dyn_cast<StoreInst>(V)) {
3146 if (!SI->isSimple() || SI->getOperand(0) != OldPN)
3147 return nullptr;
3148 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3149 // Verify it's a B->A cast.
3150 Type *TyB = BCI->getOperand(0)->getType();
3151 Type *TyA = BCI->getType();
3152 if (TyA != DestTy || TyB != SrcTy)
3153 return nullptr;
3154 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
3155 // As long as the user is another old PHI node, then even if we don't
3156 // rewrite it, the PHI web we're considering won't have any users
3157 // outside itself, so it'll be dead.
3158 if (!OldPhiNodes.contains(PHI))
3159 return nullptr;
3160 } else {
3161 return nullptr;
3162 }
3163 }
3164 }
3165
3166 // For each old PHI node, create a corresponding new PHI node with a type A.
3167 SmallDenseMap<PHINode *, PHINode *> NewPNodes;
3168 for (auto *OldPN : OldPhiNodes) {
3169 Builder.SetInsertPoint(OldPN);
3170 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
3171 NewPNodes[OldPN] = NewPN;
3172 }
3173
3174 // Fill in the operands of new PHI nodes.
3175 for (auto *OldPN : OldPhiNodes) {
3176 PHINode *NewPN = NewPNodes[OldPN];
3177 for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
3178 Value *V = OldPN->getOperand(j);
3179 Value *NewV = nullptr;
3180 if (auto *C = dyn_cast<Constant>(V)) {
3181 NewV = ConstantExpr::getBitCast(C, DestTy);
3182 } else if (auto *LI = dyn_cast<LoadInst>(V)) {
3183 // Explicitly perform load combine to make sure no opposing transform
3184 // can remove the bitcast in the meantime and trigger an infinite loop.
3185 Builder.SetInsertPoint(LI);
3186 NewV = combineLoadToNewType(*LI, DestTy);
3187 // Remove the old load and its use in the old phi, which itself becomes
3188 // dead once the whole transform finishes.
3189 replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
3191 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3192 NewV = BCI->getOperand(0);
3193 } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
3194 NewV = NewPNodes[PrevPN];
3195 }
3196 assert(NewV);
3197 NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
3198 }
3199 }
3200
3201 // Traverse all accumulated PHI nodes and process its users,
3202 // which are Stores and BitcCasts. Without this processing
3203 // NewPHI nodes could be replicated and could lead to extra
3204 // moves generated after DeSSA.
3205 // If there is a store with type B, change it to type A.
3206
3207
3208 // Replace users of BitCast B->A with NewPHI. These will help
3209 // later to get rid off a closure formed by OldPHI nodes.
3210 Instruction *RetVal = nullptr;
3211 for (auto *OldPN : OldPhiNodes) {
3212 PHINode *NewPN = NewPNodes[OldPN];
3213 for (User *V : make_early_inc_range(OldPN->users())) {
3214 if (auto *SI = dyn_cast<StoreInst>(V)) {
3215 assert(SI->isSimple() && SI->getOperand(0) == OldPN);
3216 Builder.SetInsertPoint(SI);
3217 auto *NewBC =
3218 cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
3219 SI->setOperand(0, NewBC);
3220 Worklist.push(SI);
3221 assert(hasStoreUsersOnly(*NewBC));
3222 }
3223 else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3224 Type *TyB = BCI->getOperand(0)->getType();
3225 Type *TyA = BCI->getType();
3226 assert(TyA == DestTy && TyB == SrcTy);
3227 (void) TyA;
3228 (void) TyB;
3229 Instruction *I = replaceInstUsesWith(*BCI, NewPN);
3230 if (BCI == &CI)
3231 RetVal = I;
3232 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
3233 assert(OldPhiNodes.contains(PHI));
3234 (void) PHI;
3235 } else {
3236 llvm_unreachable("all uses should be handled");
3237 }
3238 }
3239 }
3240
3241 return RetVal;
3242}
3243
3244/// Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to
3245/// copysign((bitcast Y to fp), X)
3247 InstCombiner::BuilderTy &Builder,
3248 const SimplifyQuery &SQ) {
3249 Value *X, *Y;
3250 Type *FTy = CI.getType();
3251 if (!FTy->isFPOrFPVectorTy())
3252 return nullptr;
3255 m_Value(Y)))))
3256 return nullptr;
3257 if (X->getType() != FTy)
3258 return nullptr;
3259 if (!isKnownNonNegative(Y, SQ))
3260 return nullptr;
3261
3262 return Builder.CreateCopySign(Builder.CreateBitCast(Y, FTy), X);
3263}
3264
3266 // If the operands are integer typed then apply the integer transforms,
3267 // otherwise just apply the common ones.
3268 Value *Src = CI.getOperand(0);
3269 Type *SrcTy = Src->getType();
3270 Type *DestTy = CI.getType();
3271
3272 // Get rid of casts from one type to the same type. These are useless and can
3273 // be replaced by the operand.
3274 if (DestTy == Src->getType())
3275 return replaceInstUsesWith(CI, Src);
3276
3277 if (isa<FixedVectorType>(DestTy)) {
3278 if (isa<IntegerType>(SrcTy)) {
3279 // If this is a cast from an integer to vector, check to see if the input
3280 // is a trunc or zext of a bitcast from vector. If so, we can replace all
3281 // the casts with a shuffle and (potentially) a bitcast.
3282 if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
3283 CastInst *SrcCast = cast<CastInst>(Src);
3284 if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
3285 if (isa<VectorType>(BCIn->getOperand(0)->getType()))
3287 BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
3288 return I;
3289 }
3290
3291 // If the input is an 'or' instruction, we may be doing shifts and ors to
3292 // assemble the elements of the vector manually. Try to rip the code out
3293 // and replace it with insertelements.
3294 if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
3295 return replaceInstUsesWith(CI, V);
3296 }
3297 }
3298
3299 if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
3300 if (SrcVTy->getNumElements() == 1) {
3301 // If our destination is not a vector, then make this a straight
3302 // scalar-scalar cast.
3303 if (!DestTy->isVectorTy()) {
3304 Value *Elem =
3305 Builder.CreateExtractElement(Src,
3307 return CastInst::Create(Instruction::BitCast, Elem, DestTy);
3308 }
3309
3310 // Otherwise, see if our source is an insert. If so, then use the scalar
3311 // component directly:
3312 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
3313 if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
3314 return new BitCastInst(InsElt->getOperand(1), DestTy);
3315 }
3316
3317 // Convert an artificial vector insert into more analyzable bitwise logic.
3318 unsigned BitWidth = DestTy->getScalarSizeInBits();
3319 Value *X, *Y;
3320 uint64_t IndexC;
3322 m_Value(Y), m_ConstantInt(IndexC)))) &&
3323 DestTy->isIntegerTy() && X->getType() == DestTy &&
3324 Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
3325 // Adjust for big endian - the LSBs are at the high index.
3326 if (DL.isBigEndian())
3327 IndexC = SrcVTy->getNumElements() - 1 - IndexC;
3328
3329 // We only handle (endian-normalized) insert to index 0. Any other insert
3330 // would require a left-shift, so that is an extra instruction.
3331 if (IndexC == 0) {
3332 // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
3333 unsigned EltWidth = Y->getType()->getScalarSizeInBits();
3334 APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
3335 Value *AndX = Builder.CreateAnd(X, MaskC);
3336 Value *ZextY = Builder.CreateZExt(Y, DestTy);
3337 return BinaryOperator::CreateOr(AndX, ZextY);
3338 }
3339 }
3340 }
3341
3342 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
3343 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
3344 // a bitcast to a vector with the same # elts.
3345 Value *ShufOp0 = Shuf->getOperand(0);
3346 Value *ShufOp1 = Shuf->getOperand(1);
3347 auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
3348 auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
3349 if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
3350 cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
3351 ShufElts == SrcVecElts) {
3352 BitCastInst *Tmp;
3353 // If either of the operands is a cast from CI.getType(), then
3354 // evaluating the shuffle in the casted destination's type will allow
3355 // us to eliminate at least one cast.
3356 if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
3357 Tmp->getOperand(0)->getType() == DestTy) ||
3358 ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
3359 Tmp->getOperand(0)->getType() == DestTy)) {
3360 Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
3361 Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
3362 // Return a new shuffle vector. Use the same element ID's, as we
3363 // know the vector types match #elts.
3364 return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
3365 }
3366 }
3367
3368 // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
3369 // as a byte/bit swap:
3370 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
3371 // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
3372 if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&
3373 Shuf->hasOneUse() && Shuf->isReverse()) {
3374 unsigned IntrinsicNum = 0;
3375 if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
3376 SrcTy->getScalarSizeInBits() == 8) {
3377 IntrinsicNum = Intrinsic::bswap;
3378 } else if (SrcTy->getScalarSizeInBits() == 1) {
3379 IntrinsicNum = Intrinsic::bitreverse;
3380 }
3381 if (IntrinsicNum != 0) {
3382 assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
3383 assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
3384 Function *BswapOrBitreverse = Intrinsic::getOrInsertDeclaration(
3385 CI.getModule(), IntrinsicNum, DestTy);
3386 Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
3387 return CallInst::Create(BswapOrBitreverse, {ScalarX});
3388 }
3389 }
3390 }
3391
3392 // Handle the A->B->A cast, and there is an intervening PHI node.
3393 if (PHINode *PN = dyn_cast<PHINode>(Src))
3394 if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
3395 return I;
3396
3397 if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
3398 return I;
3399
3401 return I;
3402
3404 return I;
3405
3406 if (Value *V = foldCopySignIdioms(CI, Builder, SQ.getWithInstruction(&CI)))
3407 return replaceInstUsesWith(CI, V);
3408
3409 return commonCastTransforms(CI);
3410}
3411
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file defines the DenseMap class.
static bool isSigned(unsigned Opcode)
Hexagon Common GEP
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
static Value * foldCopySignIdioms(BitCastInst &CI, InstCombiner::BuilderTy &Builder, const SimplifyQuery &SQ)
Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to copysign((bitcast Y to fp),...
static Type * shrinkFPConstantVector(Value *V, bool PreferBFloat)
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
static Instruction * foldFPtoI(Instruction &FI, InstCombiner &IC)
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
static bool fitsInFPType(APFloat F, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
SmallDenseMap< Value *, Value *, 8 > EvaluatedMap
static Type * getMinimumFPType(Value *V, Type *PreferredTy, InstCombiner &IC)
Find the minimum FP type we can safely truncate to.
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
static Type * shrinkFPConstant(LLVMContext &Ctx, const APFloat &F, bool PreferBFloat)
static Instruction * foldVecExtTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Whenever an element is extracted from a vector, optionally shifted down, and then truncated,...
static Value * EvaluateInDifferentTypeImpl(Value *V, Type *Ty, bool isSigned, InstCombinerImpl &IC, EvaluatedMap &Processed)
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
uint64_t IntrinsicInst * II
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
Value * RHS
Value * LHS
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:241
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1616
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
int32_t exactLogBase2() const
Definition APInt.h:1806
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
This class represents a conversion between pointers from one address space to another.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
LLVM_ABI std::optional< unsigned > getVScaleRangeMax() const
Returns the maximum value for the vscale_range attribute or std::nullopt when unknown.
BinaryOps getOpcode() const
Definition InstrTypes.h:409
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:279
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:283
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:512
Type * getSrcTy() const
Return the source type, as a convenience.
Definition InstrTypes.h:679
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:674
static LLVM_ABI unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, const DataLayout *DL)
Determine how a pair of casts can be eliminated, if they can be at all.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
static LLVM_ABI CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a Trunc or BitCast cast instruction.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Type * getDestTy() const
Return the destination type, as a convenience.
Definition InstrTypes.h:681
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const APFloat & getValueAPF() const
Definition Constants.h:463
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
bool uge(uint64_t Num) const
This function will return true iff this constant represents a value with active bits bigger than 64 b...
Definition Constants.h:262
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
bool isBigEndian() const
Definition DataLayout.h:218
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This class represents an extension of floating point types.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
This class represents a truncation of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:869
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2637
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition IRBuilder.h:534
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:529
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2252
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * visitZExt(ZExtInst &Zext)
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Instruction * foldExtractionOfVectorDeinterleave(ZExtInst &RootZExt)
Instruction * visitSExt(SExtInst &Sext)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Instruction * visitFPToSI(FPToSIInst &FI)
Instruction * visitTrunc(TruncInst &CI)
Instruction * visitUIToFP(CastInst &CI)
Instruction * visitPtrToInt(PtrToIntInst &CI)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * foldItoFPtoI(FPToIntTy &FI)
fpto{s/u}i.sat --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate type has enou...
Instruction * visitSIToFP(CastInst &CI)
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitFPTrunc(FPTruncInst &CI)
Value * foldPtrToIntOrAddrOfGEP(Type *IntTy, Value *Ptr)
Instruction * visitBitCast(BitCastInst &CI)
Instruction * visitIntToPtr(IntToPtrInst &CI)
Instruction * visitFPToUI(FPToUIInst &FI)
Instruction * visitPtrToAddr(PtrToAddrInst &CI)
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
Instruction * visitFPExt(CastInst &CI)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
unsigned ComputeNumSignBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
LLVM_ABI bool canBeCastedExactlyIntToFP(Value *V, Type *FPTy, bool IsSigned, const Instruction *CxtI=nullptr) const
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
LLVM_ABI bool isKnownExactCastIntToFP(CastInst &I) const
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
IRBuilder< TargetFolder, IRBuilderInstCombineInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool hasNonNeg() const LLVM_READONLY
Determine whether the the nneg flag is set.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag.
This class represents a cast from an integer to a pointer.
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents a cast from a pointer to an address (non-capturing ptrtoint).
Value * getPointerOperand()
Gets the pointer operand.
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
This class represents a sign extension of integer types.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This instruction constructs a fixed permutation of two input vectors.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class represents a truncation of integer types.
void setHasNoSignedWrap(bool B)
void setHasNoUnsignedWrap(bool B)
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:263
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:285
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition Type.h:202
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:287
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:286
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition Type.cpp:237
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:284
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
This class represents zero extension of integer types.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:237
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_lowbit_mask > m_LowBitMask()
Match an integer or vector with only the low bit(s) set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrToIntSameSize_match< OpTy > m_PtrToIntSameSize(const DataLayout &DL, const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
cst_pred_ty< is_sign_mask > m_SignMask()
Match an integer or vector with only the sign bit(s) set.
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
specific_intval< true > m_SpecificIntAllowPoison(const APInt &V)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, UIToFPInst >, CastInst_match< OpTy, SIToFPInst > > m_IToFP(const OpTy &Op)
auto m_Value()
Match an arbitrary value and ignore it.
auto m_Constant()
Match an arbitrary Constant and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_or< CastInst_match< OpTy, FPToUIInst >, CastInst_match< OpTy, FPToSIInst > > m_FPToI(const OpTy &Op)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_Ctlz(const Opnd0 &Op0, const Opnd1 &Op1)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
Definition MathExtras.h:350
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI Value * simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, const SimplifyQuery &Q)
Given operands for a CastInst, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition Local.cpp:2440
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
DWARFExpression::Operation Op
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
bool isKnownNever(FPClassTest Mask) const
Return true if it's known this can never be one of the mask entries.
Matching combinators.
SimplifyQuery getWithInstruction(const Instruction *I) const