LLVM 23.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1//===- InstCombineCasts.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for cast operations.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APInt.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/DataLayout.h"
22#include "llvm/IR/DebugInfo.h"
23#include "llvm/IR/Instruction.h"
25#include "llvm/IR/Type.h"
26#include "llvm/IR/Value.h"
29#include <iterator>
30#include <optional>
31
32using namespace llvm;
33using namespace PatternMatch;
34
35#define DEBUG_TYPE "instcombine"
36
38
41 EvaluatedMap &Processed) {
42 // Since we cover transformation of instructions with multiple users, we might
43 // come to the same node via multiple paths. We should not create a
44 // replacement for every single one of them though.
45 if (Value *Result = Processed.lookup(V))
46 return Result;
47
50
51 // Otherwise, it must be an instruction.
53 Instruction *Res = nullptr;
54 unsigned Opc = I->getOpcode();
55 switch (Opc) {
56 case Instruction::Add:
57 case Instruction::Sub:
58 case Instruction::Mul:
59 case Instruction::And:
60 case Instruction::Or:
61 case Instruction::Xor:
62 case Instruction::AShr:
63 case Instruction::LShr:
64 case Instruction::Shl:
65 case Instruction::UDiv:
66 case Instruction::URem: {
67 Value *LHS = EvaluateInDifferentTypeImpl(I->getOperand(0), Ty, isSigned, IC,
68 Processed);
69 Value *RHS = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned, IC,
70 Processed);
72 if (Opc == Instruction::LShr || Opc == Instruction::AShr)
73 Res->setIsExact(I->isExact());
74 break;
75 }
76 case Instruction::Trunc:
77 case Instruction::ZExt:
78 case Instruction::SExt:
79 // If the source type of the cast is the type we're trying for then we can
80 // just return the source. There's no need to insert it because it is not
81 // new.
82 if (I->getOperand(0)->getType() == Ty)
83 return I->getOperand(0);
84
85 // Otherwise, must be the same type of cast, so just reinsert a new one.
86 // This also handles the case of zext(trunc(x)) -> zext(x).
87 Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
88 Opc == Instruction::SExt);
89 break;
90 case Instruction::Select: {
91 Value *True = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned,
92 IC, Processed);
93 Value *False = EvaluateInDifferentTypeImpl(I->getOperand(2), Ty, isSigned,
94 IC, Processed);
95 Res = SelectInst::Create(I->getOperand(0), True, False);
96 break;
97 }
98 case Instruction::PHI: {
99 PHINode *OPN = cast<PHINode>(I);
101 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
103 isSigned, IC, Processed);
104 NPN->addIncoming(V, OPN->getIncomingBlock(i));
105 }
106 Res = NPN;
107 break;
108 }
109 case Instruction::FPToUI:
110 case Instruction::FPToSI:
111 Res = CastInst::Create(static_cast<Instruction::CastOps>(Opc),
112 I->getOperand(0), Ty);
113 break;
114 case Instruction::Call:
116 switch (II->getIntrinsicID()) {
117 default:
118 llvm_unreachable("Unsupported call!");
119 case Intrinsic::vscale: {
121 I->getModule(), Intrinsic::vscale, {Ty});
122 Res = CallInst::Create(Fn->getFunctionType(), Fn);
123 break;
124 }
125 }
126 }
127 break;
128 case Instruction::ShuffleVector: {
129 auto *ScalarTy = cast<VectorType>(Ty)->getElementType();
130 auto *VTy = cast<VectorType>(I->getOperand(0)->getType());
131 auto *FixedTy = VectorType::get(ScalarTy, VTy->getElementCount());
132 Value *Op0 = EvaluateInDifferentTypeImpl(I->getOperand(0), FixedTy,
133 isSigned, IC, Processed);
134 Value *Op1 = EvaluateInDifferentTypeImpl(I->getOperand(1), FixedTy,
135 isSigned, IC, Processed);
136 Res = new ShuffleVectorInst(Op0, Op1,
137 cast<ShuffleVectorInst>(I)->getShuffleMask());
138 break;
139 }
140 default:
141 // TODO: Can handle more cases here.
142 llvm_unreachable("Unreachable!");
143 }
144
145 Res->takeName(I);
146 Value *Result = IC.InsertNewInstWith(Res, I->getIterator());
147 // There is no need in keeping track of the old value/new value relationship
148 // when we have only one user, we came have here from that user and no-one
149 // else cares.
150 if (!V->hasOneUse())
151 Processed[V] = Result;
152
153 return Result;
154}
155
156/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
157/// true for, actually insert the code to evaluate the expression.
159 bool isSigned) {
160 EvaluatedMap Processed;
161 return EvaluateInDifferentTypeImpl(V, Ty, isSigned, *this, Processed);
162}
163
165InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
166 const CastInst *CI2) {
167 Type *SrcTy = CI1->getSrcTy();
168 Type *MidTy = CI1->getDestTy();
169 Type *DstTy = CI2->getDestTy();
170
171 Instruction::CastOps firstOp = CI1->getOpcode();
172 Instruction::CastOps secondOp = CI2->getOpcode();
173 Type *SrcIntPtrTy =
174 SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
175 Type *DstIntPtrTy =
176 DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
177 unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
178 DstTy, &DL);
179
180 // We don't want to form an inttoptr or ptrtoint that converts to an integer
181 // type that differs from the pointer size.
182 if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
183 (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
184 Res = 0;
185
186 return Instruction::CastOps(Res);
187}
188
189/// Implement the transforms common to all CastInst visitors.
191 Value *Src = CI.getOperand(0);
192 Type *Ty = CI.getType();
193
194 if (Value *Res =
195 simplifyCastInst(CI.getOpcode(), Src, Ty, SQ.getWithInstruction(&CI)))
196 return replaceInstUsesWith(CI, Res);
197
198 // Try to eliminate a cast of a cast.
199 if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
200 if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
201 // The first cast (CSrc) is eliminable so we need to fix up or replace
202 // the second cast (CI). CSrc will then have a good chance of being dead.
203 auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
204 // Point debug users of the dying cast to the new one.
205 if (CSrc->hasOneUse())
206 replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
207 return Res;
208 }
209 }
210
211 if (auto *Sel = dyn_cast<SelectInst>(Src)) {
212 // We are casting a select. Try to fold the cast into the select if the
213 // select does not have a compare instruction with matching operand types
214 // or the select is likely better done in a narrow type.
215 // Creating a select with operands that are different sizes than its
216 // condition may inhibit other folds and lead to worse codegen.
217 auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
218 if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
219 (CI.getOpcode() == Instruction::Trunc &&
220 shouldChangeType(CI.getSrcTy(), CI.getType()))) {
221
222 // If it's a bitcast involving vectors, make sure it has the same number
223 // of elements on both sides.
224 if (CI.getOpcode() != Instruction::BitCast ||
226 if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
227 replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
228 return NV;
229 }
230 }
231 }
232 }
233
234 // If we are casting a PHI, then fold the cast into the PHI.
235 if (auto *PN = dyn_cast<PHINode>(Src)) {
236 // Don't do this if it would create a PHI node with an illegal type from a
237 // legal type.
238 if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
239 shouldChangeType(CI.getSrcTy(), CI.getType()))
240 if (Instruction *NV = foldOpIntoPhi(CI, PN))
241 return NV;
242 }
243
244 // Canonicalize a unary shuffle after the cast if neither operation changes
245 // the size or element size of the input vector.
246 // TODO: We could allow size-changing ops if that doesn't harm codegen.
247 // cast (shuffle X, Mask) --> shuffle (cast X), Mask
248 Value *X;
249 ArrayRef<int> Mask;
250 if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {
251 // TODO: Allow scalable vectors?
252 auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
253 auto *DestTy = dyn_cast<FixedVectorType>(Ty);
254 if (SrcTy && DestTy &&
255 SrcTy->getNumElements() == DestTy->getNumElements() &&
256 SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
257 Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
258 return new ShuffleVectorInst(CastX, Mask);
259 }
260 }
261
262 return nullptr;
263}
264
265namespace {
266
267/// Helper class for evaluating whether a value can be computed in a different
268/// type without changing its value. Used by cast simplification transforms.
269class TypeEvaluationHelper {
270public:
271 /// Return true if we can evaluate the specified expression tree as type Ty
272 /// instead of its larger type, and arrive with the same value.
273 /// This is used by code that tries to eliminate truncates.
274 [[nodiscard]] static bool canEvaluateTruncated(Value *V, Type *Ty,
276 Instruction *CxtI);
277
278 /// Determine if the specified value can be computed in the specified wider
279 /// type and produce the same low bits. If not, return false.
280 [[nodiscard]] static bool canEvaluateZExtd(Value *V, Type *Ty,
281 unsigned &BitsToClear,
283 Instruction *CxtI);
284
285 /// Return true if we can take the specified value and return it as type Ty
286 /// without inserting any new casts and without changing the value of the
287 /// common low bits.
288 [[nodiscard]] static bool canEvaluateSExtd(Value *V, Type *Ty);
289
290private:
291 /// Constants and extensions/truncates from the destination type are always
292 /// free to be evaluated in that type.
293 [[nodiscard]] static bool canAlwaysEvaluateInType(Value *V, Type *Ty);
294
295 /// Check if we traversed all the users of the multi-use values we've seen.
296 [[nodiscard]] bool allPendingVisited() const {
297 return llvm::all_of(Pending,
298 [this](Value *V) { return Visited.contains(V); });
299 }
300
301 /// A generic wrapper for canEvaluate* recursions to inject visitation
302 /// tracking and enforce correct multi-use value evaluations.
303 [[nodiscard]] bool
304 canEvaluate(Value *V, Type *Ty,
305 llvm::function_ref<bool(Value *, Type *Type)> Pred) {
306 if (canAlwaysEvaluateInType(V, Ty))
307 return true;
308
309 auto *I = dyn_cast<Instruction>(V);
310
311 if (I == nullptr)
312 return false;
313
314 // We insert false by default to return false when we encounter user loops.
315 const auto [It, Inserted] = Visited.insert({V, false});
316
317 // There are three possible cases for us having information on this value
318 // in the Visited map:
319 // 1. We properly checked it and concluded that we can evaluate it (true)
320 // 2. We properly checked it and concluded that we can't (false)
321 // 3. We started to check it, but during the recursive traversal we came
322 // back to it.
323 //
324 // For cases 1 and 2, we can safely return the stored result. For case 3, we
325 // can potentially have a situation where we can evaluate recursive user
326 // chains, but that can be quite tricky to do properly and isntead, we
327 // return false.
328 //
329 // In any case, we should return whatever was there in the map to begin
330 // with.
331 if (!Inserted)
332 return It->getSecond();
333
334 // We can easily make a decision about single-user values whether they can
335 // be evaluated in a different type or not, we came from that user. This is
336 // not as simple for multi-user values.
337 //
338 // In general, we have the following case (inverted control-flow, users are
339 // at the top):
340 //
341 // Cast %A
342 // ____|
343 // /
344 // %A = Use %B, %C
345 // ________| |
346 // / |
347 // %B = Use %D |
348 // ________| |
349 // / |
350 // %D = Use %C |
351 // ________|___|
352 // /
353 // %C = ...
354 //
355 // In this case, when we check %A, %B and %D, we are confident that we can
356 // make the decision here and now, since we came from their only users.
357 //
358 // For %C, it is harder. We come there twice, and when we come the first
359 // time, it's hard to tell if we will visit the second user (technically
360 // it's not hard, but we might need a lot of repetitive checks with non-zero
361 // cost).
362 //
363 // In the case above, we are allowed to evaluate %C in different type
364 // because all of it users were part of the traversal.
365 //
366 // In the following case, however, we can't make this conclusion:
367 //
368 // Cast %A
369 // ____|
370 // /
371 // %A = Use %B, %C
372 // ________| |
373 // / |
374 // %B = Use %D |
375 // ________| |
376 // / |
377 // %D = Use %C |
378 // | |
379 // foo(%C) | | <- never traversing foo(%C)
380 // ________|___|
381 // /
382 // %C = ...
383 //
384 // In this case, we still can evaluate %C in a different type, but we'd need
385 // to create a copy of the original %C to be used in foo(%C). Such
386 // duplication might be not profitable.
387 //
388 // For this reason, we collect all users of the mult-user values and mark
389 // them as "pending" and defer this decision to the very end. When we are
390 // done and and ready to have a positive verdict, we should double-check all
391 // of the pending users and ensure that we visited them. allPendingVisited
392 // predicate checks exactly that.
393 if (!I->hasOneUse())
394 llvm::append_range(Pending, I->users());
395
396 const bool Result = Pred(V, Ty);
397 // We have to set result this way and not via It because Pred is recursive
398 // and it is very likely that we grew Visited and invalidated It.
399 Visited[V] = Result;
400 return Result;
401 }
402
403 /// Filter out values that we can not evaluate in the destination type for
404 /// free.
405 [[nodiscard]] bool canNotEvaluateInType(Value *V, Type *Ty);
406
407 [[nodiscard]] bool canEvaluateTruncatedImpl(Value *V, Type *Ty,
408 InstCombinerImpl &IC,
409 Instruction *CxtI);
410 [[nodiscard]] bool canEvaluateTruncatedPred(Value *V, Type *Ty,
411 InstCombinerImpl &IC,
412 Instruction *CxtI);
413 [[nodiscard]] bool canEvaluateZExtdImpl(Value *V, Type *Ty,
414 unsigned &BitsToClear,
415 InstCombinerImpl &IC,
416 Instruction *CxtI);
417 [[nodiscard]] bool canEvaluateSExtdImpl(Value *V, Type *Ty);
418 [[nodiscard]] bool canEvaluateSExtdPred(Value *V, Type *Ty);
419
420 /// A bookkeeping map to memorize an already made decision for a traversed
421 /// value.
422 SmallDenseMap<Value *, bool, 8> Visited;
423
424 /// A list of pending values to check in the end.
425 SmallVector<Value *, 8> Pending;
426};
427
428} // anonymous namespace
429
430/// Constants and extensions/truncates from the destination type are always
431/// free to be evaluated in that type. This is a helper for canEvaluate*.
432bool TypeEvaluationHelper::canAlwaysEvaluateInType(Value *V, Type *Ty) {
433 if (isa<Constant>(V))
434 return match(V, m_ImmConstant());
435
436 Value *X;
437 if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
438 X->getType() == Ty)
439 return true;
440
441 return false;
442}
443
444/// Filter out values that we can not evaluate in the destination type for free.
445/// This is a helper for canEvaluate*.
446bool TypeEvaluationHelper::canNotEvaluateInType(Value *V, Type *Ty) {
447 if (!isa<Instruction>(V))
448 return true;
449 // We don't extend or shrink something that has multiple uses -- doing so
450 // would require duplicating the instruction which isn't profitable.
451 if (!V->hasOneUse())
452 return true;
453
454 return false;
455}
456
457/// Return true if we can evaluate the specified expression tree as type Ty
458/// instead of its larger type, and arrive with the same value.
459/// This is used by code that tries to eliminate truncates.
460///
461/// Ty will always be a type smaller than V. We should return true if trunc(V)
462/// can be computed by computing V in the smaller type. If V is an instruction,
463/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
464/// makes sense if x and y can be efficiently truncated.
465///
466/// This function works on both vectors and scalars.
467///
468bool TypeEvaluationHelper::canEvaluateTruncated(Value *V, Type *Ty,
470 Instruction *CxtI) {
471 TypeEvaluationHelper TYH;
472 return TYH.canEvaluateTruncatedImpl(V, Ty, IC, CxtI) &&
473 // We need to check whether we visited all users of multi-user values,
474 // and we have to do it at the very end, outside of the recursion.
475 TYH.allPendingVisited();
476}
477
478bool TypeEvaluationHelper::canEvaluateTruncatedImpl(Value *V, Type *Ty,
480 Instruction *CxtI) {
481 return canEvaluate(V, Ty, [this, &IC, CxtI](Value *V, Type *Ty) {
482 return canEvaluateTruncatedPred(V, Ty, IC, CxtI);
483 });
484}
485
486bool TypeEvaluationHelper::canEvaluateTruncatedPred(Value *V, Type *Ty,
488 Instruction *CxtI) {
489 auto *I = cast<Instruction>(V);
490 Type *OrigTy = V->getType();
491 switch (I->getOpcode()) {
492 case Instruction::Add:
493 case Instruction::Sub:
494 case Instruction::Mul:
495 case Instruction::And:
496 case Instruction::Or:
497 case Instruction::Xor:
498 // These operators can all arbitrarily be extended or truncated.
499 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
500 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
501
502 case Instruction::UDiv:
503 case Instruction::URem: {
504 // UDiv and URem can be truncated if all the truncated bits are zero.
505 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
506 uint32_t BitWidth = Ty->getScalarSizeInBits();
507 assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
508 APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
509 // Do not preserve the original context instruction. Simplifying div/rem
510 // based on later context may introduce a trap.
511 if (IC.MaskedValueIsZero(I->getOperand(0), Mask, I) &&
512 IC.MaskedValueIsZero(I->getOperand(1), Mask, I)) {
513 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
514 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
515 }
516 break;
517 }
518 case Instruction::Shl: {
519 // If we are truncating the result of this SHL, and if it's a shift of an
520 // inrange amount, we can always perform a SHL in a smaller type.
521 uint32_t BitWidth = Ty->getScalarSizeInBits();
522 KnownBits AmtKnownBits =
523 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
524 if (AmtKnownBits.getMaxValue().ult(BitWidth))
525 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
526 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
527 break;
528 }
529 case Instruction::LShr: {
530 // If this is a truncate of a logical shr, we can truncate it to a smaller
531 // lshr iff we know that the bits we would otherwise be shifting in are
532 // already zeros.
533 // TODO: It is enough to check that the bits we would be shifting in are
534 // zero - use AmtKnownBits.getMaxValue().
535 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
536 uint32_t BitWidth = Ty->getScalarSizeInBits();
537 KnownBits AmtKnownBits = IC.computeKnownBits(I->getOperand(1), CxtI);
538 APInt MaxShiftAmt = AmtKnownBits.getMaxValue();
539 APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
540 if (MaxShiftAmt.ult(BitWidth)) {
541 // If the only user is a trunc then we can narrow the shift if any new
542 // MSBs are not going to be used.
543 if (auto *Trunc = dyn_cast<TruncInst>(V->user_back())) {
544 auto DemandedBits = Trunc->getType()->getScalarSizeInBits();
545 if ((MaxShiftAmt + DemandedBits).ule(BitWidth))
546 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
547 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
548 }
549 if (IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, CxtI))
550 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
551 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
552 }
553 break;
554 }
555 case Instruction::AShr: {
556 // If this is a truncate of an arithmetic shr, we can truncate it to a
557 // smaller ashr iff we know that all the bits from the sign bit of the
558 // original type and the sign bit of the truncate type are similar.
559 // TODO: It is enough to check that the bits we would be shifting in are
560 // similar to sign bit of the truncate type.
561 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
562 uint32_t BitWidth = Ty->getScalarSizeInBits();
563 KnownBits AmtKnownBits =
564 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
565 unsigned ShiftedBits = OrigBitWidth - BitWidth;
566 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
567 ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), CxtI))
568 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
569 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
570 break;
571 }
572 case Instruction::Trunc:
573 // trunc(trunc(x)) -> trunc(x)
574 return true;
575 case Instruction::ZExt:
576 case Instruction::SExt:
577 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
578 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
579 return true;
580 case Instruction::Select: {
582 return canEvaluateTruncatedImpl(SI->getTrueValue(), Ty, IC, CxtI) &&
583 canEvaluateTruncatedImpl(SI->getFalseValue(), Ty, IC, CxtI);
584 }
585 case Instruction::PHI: {
586 // We can change a phi if we can change all operands. Note that we never
587 // get into trouble with cyclic PHIs here because canEvaluate handles use
588 // chain loops.
589 PHINode *PN = cast<PHINode>(I);
590 return llvm::all_of(
591 PN->incoming_values(), [this, Ty, &IC, CxtI](Value *IncValue) {
592 return canEvaluateTruncatedImpl(IncValue, Ty, IC, CxtI);
593 });
594 }
595 case Instruction::FPToUI:
596 case Instruction::FPToSI: {
597 // If the integer type can hold the max FP value, it is safe to cast
598 // directly to that type. Otherwise, we may create poison via overflow
599 // that did not exist in the original code.
600 Type *InputTy = I->getOperand(0)->getType()->getScalarType();
601 const fltSemantics &Semantics = InputTy->getFltSemantics();
602 uint32_t MinBitWidth = APFloatBase::semanticsIntSizeInBits(
603 Semantics, I->getOpcode() == Instruction::FPToSI);
604 return Ty->getScalarSizeInBits() >= MinBitWidth;
605 }
606 case Instruction::ShuffleVector:
607 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
608 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
609
610 default:
611 // TODO: Can handle more cases here.
612 break;
613 }
614
615 return false;
616}
617
618/// Given a vector that is bitcast to an integer, optionally logically
619/// right-shifted, and truncated, convert it to an extractelement.
620/// Example (big endian):
621/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
622/// --->
623/// extractelement <4 x i32> %X, 1
625 InstCombinerImpl &IC) {
626 Value *TruncOp = Trunc.getOperand(0);
627 Type *DestType = Trunc.getType();
628 if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
629 return nullptr;
630
631 Value *VecInput = nullptr;
632 ConstantInt *ShiftVal = nullptr;
633 if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
634 m_LShr(m_BitCast(m_Value(VecInput)),
635 m_ConstantInt(ShiftVal)))) ||
636 !isa<VectorType>(VecInput->getType()))
637 return nullptr;
638
639 VectorType *VecType = cast<VectorType>(VecInput->getType());
640 unsigned VecWidth = VecType->getPrimitiveSizeInBits();
641 unsigned DestWidth = DestType->getPrimitiveSizeInBits();
642 unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
643
644 if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
645 return nullptr;
646
647 // If the element type of the vector doesn't match the result type,
648 // bitcast it to a vector type that we can extract from.
649 unsigned NumVecElts = VecWidth / DestWidth;
650 if (VecType->getElementType() != DestType) {
651 VecType = FixedVectorType::get(DestType, NumVecElts);
652 VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
653 }
654
655 unsigned Elt = ShiftAmount / DestWidth;
656 if (IC.getDataLayout().isBigEndian())
657 Elt = NumVecElts - 1 - Elt;
658
659 return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
660}
661
662/// Whenever an element is extracted from a vector, optionally shifted down, and
663/// then truncated, canonicalize by converting it to a bitcast followed by an
664/// extractelement.
665///
666/// Examples (little endian):
667/// trunc (extractelement <4 x i64> %X, 0) to i32
668/// --->
669/// extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
670///
671/// trunc (lshr (extractelement <4 x i32> %X, 0), 8) to i8
672/// --->
673/// extractelement <16 x i8> (bitcast <4 x i32> %X to <16 x i8>), i32 1
675 InstCombinerImpl &IC) {
676 Value *Src = Trunc.getOperand(0);
677 Type *SrcType = Src->getType();
678 Type *DstType = Trunc.getType();
679
680 // Only attempt this if we have simple aliasing of the vector elements.
681 // A badly fit destination size would result in an invalid cast.
682 unsigned SrcBits = SrcType->getScalarSizeInBits();
683 unsigned DstBits = DstType->getScalarSizeInBits();
684 unsigned TruncRatio = SrcBits / DstBits;
685 if ((SrcBits % DstBits) != 0)
686 return nullptr;
687
688 Value *VecOp;
689 ConstantInt *Cst;
690 const APInt *ShiftAmount = nullptr;
691 if (!match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst)))) &&
692 !match(Src,
694 m_APInt(ShiftAmount)))))
695 return nullptr;
696
697 auto *VecOpTy = cast<VectorType>(VecOp->getType());
698 auto VecElts = VecOpTy->getElementCount();
699
700 uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
701 // Make sure we don't overflow in the calculation of the new index.
702 // (VecOpIdx + 1) * TruncRatio should not overflow.
703 if (Cst->uge(std::numeric_limits<uint64_t>::max() / TruncRatio))
704 return nullptr;
705 uint64_t VecOpIdx = Cst->getZExtValue();
706 uint64_t NewIdx = IC.getDataLayout().isBigEndian()
707 ? (VecOpIdx + 1) * TruncRatio - 1
708 : VecOpIdx * TruncRatio;
709
710 // Adjust index by the whole number of truncated elements.
711 if (ShiftAmount) {
712 // Check shift amount is in range and shifts a whole number of truncated
713 // elements.
714 if (ShiftAmount->uge(SrcBits) || ShiftAmount->urem(DstBits) != 0)
715 return nullptr;
716
717 uint64_t IdxOfs = ShiftAmount->udiv(DstBits).getZExtValue();
718 // IdxOfs is guaranteed to be less than TruncRatio, so we won't overflow in
719 // the adjustment.
720 assert(IdxOfs < TruncRatio &&
721 "IdxOfs is expected to be less than TruncRatio.");
722 NewIdx = IC.getDataLayout().isBigEndian() ? (NewIdx - IdxOfs)
723 : (NewIdx + IdxOfs);
724 }
725
726 assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
727 "overflow 32-bits");
728
729 auto *BitCastTo =
730 VectorType::get(DstType, BitCastNumElts, VecElts.isScalable());
731 Value *BitCast = IC.Builder.CreateBitCast(VecOp, BitCastTo);
732 return ExtractElementInst::Create(BitCast, IC.Builder.getInt64(NewIdx));
733}
734
735/// Funnel/Rotate left/right may occur in a wider type than necessary because of
736/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
737Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
738 assert((isa<VectorType>(Trunc.getSrcTy()) ||
739 shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
740 "Don't narrow to an illegal scalar type");
741
742 // Bail out on strange types. It is possible to handle some of these patterns
743 // even with non-power-of-2 sizes, but it is not a likely scenario.
744 Type *DestTy = Trunc.getType();
745 unsigned NarrowWidth = DestTy->getScalarSizeInBits();
746 unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
747 if (!isPowerOf2_32(NarrowWidth))
748 return nullptr;
749
750 // First, find an or'd pair of opposite shifts:
751 // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
752 BinaryOperator *Or0, *Or1;
753 if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
754 return nullptr;
755
756 Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
757 if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
758 !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
759 Or0->getOpcode() == Or1->getOpcode())
760 return nullptr;
761
762 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
763 if (Or0->getOpcode() == BinaryOperator::LShr) {
764 std::swap(Or0, Or1);
765 std::swap(ShVal0, ShVal1);
766 std::swap(ShAmt0, ShAmt1);
767 }
768 assert(Or0->getOpcode() == BinaryOperator::Shl &&
769 Or1->getOpcode() == BinaryOperator::LShr &&
770 "Illegal or(shift,shift) pair");
771
772 // Match the shift amount operands for a funnel/rotate pattern. This always
773 // matches a subtraction on the R operand.
774 auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
775 // The shift amounts may add up to the narrow bit width:
776 // (shl ShVal0, L) | (lshr ShVal1, Width - L)
777 // If this is a funnel shift (different operands are shifted), then the
778 // shift amount can not over-shift (create poison) in the narrow type.
779 unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
780 APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
781 if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
782 if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
783 return L;
784
785 // The following patterns currently only work for rotation patterns.
786 // TODO: Add more general funnel-shift compatible patterns.
787 if (ShVal0 != ShVal1)
788 return nullptr;
789
790 // The shift amount may be masked with negation:
791 // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
792 Value *X;
793 unsigned Mask = Width - 1;
794 if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
796 return X;
797
798 // Same as above, but the shift amount may be extended after masking:
799 if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
801 return X;
802
803 return nullptr;
804 };
805
806 Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
807 bool IsFshl = true; // Sub on LSHR.
808 if (!ShAmt) {
809 ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
810 IsFshl = false; // Sub on SHL.
811 }
812 if (!ShAmt)
813 return nullptr;
814
815 // The right-shifted value must have high zeros in the wide type (for example
816 // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
817 // truncated, so those do not matter.
818 APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
819 if (!MaskedValueIsZero(ShVal1, HiBitMask, &Trunc))
820 return nullptr;
821
822 // Adjust the width of ShAmt for narrowed funnel shift operation:
823 // - Zero-extend if ShAmt is narrower than the destination type.
824 // - Truncate if ShAmt is wider, discarding non-significant high-order bits.
825 // This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),
826 // zext/trunc(ShAmt)).
827 Value *NarrowShAmt = Builder.CreateZExtOrTrunc(ShAmt, DestTy);
828
829 Value *X, *Y;
830 X = Y = Builder.CreateTrunc(ShVal0, DestTy);
831 if (ShVal0 != ShVal1)
832 Y = Builder.CreateTrunc(ShVal1, DestTy);
833 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
834 Function *F =
835 Intrinsic::getOrInsertDeclaration(Trunc.getModule(), IID, DestTy);
836 return CallInst::Create(F, {X, Y, NarrowShAmt});
837}
838
839/// Try to narrow the width of math or bitwise logic instructions by pulling a
840/// truncate ahead of binary operators.
841Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
842 Type *SrcTy = Trunc.getSrcTy();
843 Type *DestTy = Trunc.getType();
844 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
845 unsigned DestWidth = DestTy->getScalarSizeInBits();
846
847 if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
848 return nullptr;
849
850 BinaryOperator *BinOp;
851 if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
852 return nullptr;
853
854 Value *BinOp0 = BinOp->getOperand(0);
855 Value *BinOp1 = BinOp->getOperand(1);
856 switch (BinOp->getOpcode()) {
857 case Instruction::And:
858 case Instruction::Or:
859 case Instruction::Xor:
860 case Instruction::Add:
861 case Instruction::Sub:
862 case Instruction::Mul: {
863 Constant *C;
864 if (match(BinOp0, m_Constant(C))) {
865 // trunc (binop C, X) --> binop (trunc C', X)
866 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
867 Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
868 return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
869 }
870 if (match(BinOp1, m_Constant(C))) {
871 // trunc (binop X, C) --> binop (trunc X, C')
872 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
873 Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
874 return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
875 }
876 Value *X;
877 if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
878 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
879 Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
880 return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
881 }
882 if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
883 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
884 Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
885 return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
886 }
887 break;
888 }
889 case Instruction::LShr:
890 case Instruction::AShr: {
891 // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
892 Value *A;
893 Constant *C;
894 if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
895 unsigned MaxShiftAmt = SrcWidth - DestWidth;
896 // If the shift is small enough, all zero/sign bits created by the shift
897 // are removed by the trunc.
899 APInt(SrcWidth, MaxShiftAmt)))) {
900 auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
901 bool IsExact = OldShift->isExact();
902 if (Constant *ShAmt = ConstantFoldIntegerCast(C, A->getType(),
903 /*IsSigned*/ true, DL)) {
904 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
905 Value *Shift =
906 OldShift->getOpcode() == Instruction::AShr
907 ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
908 : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
909 return CastInst::CreateTruncOrBitCast(Shift, DestTy);
910 }
911 }
912 }
913 break;
914 }
915 default: break;
916 }
917
918 if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
919 return NarrowOr;
920
921 return nullptr;
922}
923
924/// Try to narrow the width of a splat shuffle. This could be generalized to any
925/// shuffle with a constant operand, but we limit the transform to avoid
926/// creating a shuffle type that targets may not be able to lower effectively.
928 InstCombiner::BuilderTy &Builder) {
929 auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
930 if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
931 all_equal(Shuf->getShuffleMask()) &&
932 ElementCount::isKnownGE(Shuf->getType()->getElementCount(),
933 cast<VectorType>(Shuf->getOperand(0)->getType())
934 ->getElementCount())) {
935 // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
936 // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
937 Type *NewTruncTy = Shuf->getOperand(0)->getType()->getWithNewType(
938 Trunc.getType()->getScalarType());
939 Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), NewTruncTy);
940 return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
941 }
942
943 return nullptr;
944}
945
946/// Try to narrow the width of an insert element. This could be generalized for
947/// any vector constant, but we limit the transform to insertion into undef to
948/// avoid potential backend problems from unsupported insertion widths. This
949/// could also be extended to handle the case of inserting a scalar constant
950/// into a vector variable.
952 InstCombiner::BuilderTy &Builder) {
953 Instruction::CastOps Opcode = Trunc.getOpcode();
954 assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
955 "Unexpected instruction for shrinking");
956
957 auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
958 if (!InsElt || !InsElt->hasOneUse())
959 return nullptr;
960
961 Type *DestTy = Trunc.getType();
962 Type *DestScalarTy = DestTy->getScalarType();
963 Value *VecOp = InsElt->getOperand(0);
964 Value *ScalarOp = InsElt->getOperand(1);
965 Value *Index = InsElt->getOperand(2);
966
967 if (match(VecOp, m_Undef())) {
968 // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
969 // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
970 UndefValue *NarrowUndef = UndefValue::get(DestTy);
971 Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
972 return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
973 }
974
975 return nullptr;
976}
977
979 if (Instruction *Result = commonCastTransforms(Trunc))
980 return Result;
981
982 Value *Src = Trunc.getOperand(0);
983 Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
984 unsigned DestWidth = DestTy->getScalarSizeInBits();
985 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
986
987 // Attempt to truncate the entire input expression tree to the destination
988 // type. Only do this if the dest type is a simple type, don't convert the
989 // expression tree to something weird like i93 unless the source is also
990 // strange.
991 if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
992 TypeEvaluationHelper::canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
993
994 // If this cast is a truncate, evaluting in a different type always
995 // eliminates the cast, so it is always a win.
997 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
998 " to avoid cast: "
999 << Trunc << '\n');
1000 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1001 assert(Res->getType() == DestTy);
1002 return replaceInstUsesWith(Trunc, Res);
1003 }
1004
1005 // For integer types, check if we can shorten the entire input expression to
1006 // DestWidth * 2, which won't allow removing the truncate, but reducing the
1007 // width may enable further optimizations, e.g. allowing for larger
1008 // vectorization factors.
1009 if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
1010 if (DestWidth * 2 < SrcWidth) {
1011 auto *NewDestTy = DestITy->getExtendedType();
1012 if (shouldChangeType(SrcTy, NewDestTy) &&
1013 TypeEvaluationHelper::canEvaluateTruncated(Src, NewDestTy, *this,
1014 &Trunc)) {
1015 LLVM_DEBUG(
1016 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1017 " to reduce the width of operand of"
1018 << Trunc << '\n');
1019 Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
1020 return new TruncInst(Res, DestTy);
1021 }
1022 }
1023 }
1024
1025 // See if we can simplify any instructions used by the input whose sole
1026 // purpose is to compute bits we don't care about.
1028 return &Trunc;
1029
1030 if (DestWidth == 1) {
1031 Value *Zero = Constant::getNullValue(SrcTy);
1032
1033 Value *X;
1034 const APInt *C1;
1035 Constant *C2;
1036 if (match(Src, m_OneUse(m_Shr(m_Shl(m_Power2(C1), m_Value(X)),
1037 m_ImmConstant(C2))))) {
1038 // trunc ((C1 << X) >> C2) to i1 --> X == (C2-cttz(C1)), where C1 is pow2
1039 Constant *Log2C1 = ConstantInt::get(SrcTy, C1->exactLogBase2());
1040 Constant *CmpC = ConstantExpr::getSub(C2, Log2C1);
1041 return new ICmpInst(ICmpInst::ICMP_EQ, X, CmpC);
1042 }
1043
1044 if (match(Src, m_Shr(m_Value(X), m_SpecificInt(SrcWidth - 1)))) {
1045 // trunc (ashr X, BW-1) to i1 --> icmp slt X, 0
1046 // trunc (lshr X, BW-1) to i1 --> icmp slt X, 0
1047 return new ICmpInst(ICmpInst::ICMP_SLT, X, Zero);
1048 }
1049
1050 Constant *C;
1051 if (match(Src, m_OneUse(m_LShr(m_Value(X), m_ImmConstant(C))))) {
1052 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
1053 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
1054 Value *MaskC = Builder.CreateShl(One, C);
1055 Value *And = Builder.CreateAnd(X, MaskC);
1056 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
1057 }
1059 m_Deferred(X))))) {
1060 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
1061 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
1062 Value *MaskC = Builder.CreateShl(One, C);
1063 Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One));
1064 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
1065 }
1066
1067 {
1068 const APInt *C;
1069 if (match(Src, m_Shl(m_APInt(C), m_Value(X))) && (*C)[0] == 1) {
1070 // trunc (C << X) to i1 --> X == 0, where C is odd
1071 return new ICmpInst(ICmpInst::Predicate::ICMP_EQ, X, Zero);
1072 }
1073 }
1074
1075 if (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) {
1076 Value *X, *Y;
1077 if (match(Src, m_Xor(m_Value(X), m_Value(Y))))
1078 return new ICmpInst(ICmpInst::ICMP_NE, X, Y);
1079 }
1080
1081 if (match(Src,
1083 return new ICmpInst(ICmpInst::ICMP_EQ, X,
1085 }
1086
1087 Value *A, *B;
1088 Constant *C;
1089
1090 // trunc(u/smin(zext(a) + zext(b), MAX)) --> uadd.sat(a, b)
1091 if (match(Src,
1094 m_SpecificInt(APInt::getMaxValue(DestWidth))),
1096 m_SpecificInt(APInt::getMaxValue(DestWidth)))))) &&
1097 A->getType() == DestTy && B->getType() == DestTy) {
1098 return replaceInstUsesWith(
1099 Trunc, Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, A, B));
1100 }
1101
1102 // trunc(smax(zext(a) - zext(b), 0)) --> usub.sat(a, b)
1103 if (match(Src, m_OneUse(m_SMax(
1105 m_Zero()))) &&
1106 A->getType() == DestTy && B->getType() == DestTy) {
1107 return replaceInstUsesWith(
1108 Trunc, Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, A, B));
1109 }
1110
1111 if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
1112 unsigned AWidth = A->getType()->getScalarSizeInBits();
1113 unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
1114 auto *OldSh = cast<Instruction>(Src);
1115 bool IsExact = OldSh->isExact();
1116
1117 // If the shift is small enough, all zero bits created by the shift are
1118 // removed by the trunc.
1120 APInt(SrcWidth, MaxShiftAmt)))) {
1121 auto GetNewShAmt = [&](unsigned Width) {
1122 Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);
1123 Constant *Cmp =
1125 Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);
1126 return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),
1127 DL);
1128 };
1129
1130 // trunc (lshr (sext A), C) --> ashr A, C
1131 if (A->getType() == DestTy) {
1132 Constant *ShAmt = GetNewShAmt(DestWidth);
1133 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
1134 return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
1135 : BinaryOperator::CreateAShr(A, ShAmt);
1136 }
1137 // The types are mismatched, so create a cast after shifting:
1138 // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
1139 if (Src->hasOneUse()) {
1140 Constant *ShAmt = GetNewShAmt(AWidth);
1141 Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
1142 return CastInst::CreateIntegerCast(Shift, DestTy, true);
1143 }
1144 }
1145 // TODO: Mask high bits with 'and'.
1146 }
1147
1148 if (Instruction *I = narrowBinOp(Trunc))
1149 return I;
1150
1151 if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
1152 return I;
1153
1154 if (Instruction *I = shrinkInsertElt(Trunc, Builder))
1155 return I;
1156
1157 if (Src->hasOneUse() &&
1158 (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
1159 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
1160 // dest type is native and cst < dest size.
1161 if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
1162 !match(A, m_Shr(m_Value(), m_Constant()))) {
1163 // Skip shifts of shift by constants. It undoes a combine in
1164 // FoldShiftByConstant and is the extend in reg pattern.
1165 APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
1166 if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
1167 Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
1168 return BinaryOperator::Create(Instruction::Shl, NewTrunc,
1169 ConstantExpr::getTrunc(C, DestTy));
1170 }
1171 }
1172 }
1173
1174 // trunc (select(icmp_ult(A, DestTy_umax+1), A, sext(icmp_sgt(A, 0)))) -->
1175 // trunc (smin(smax(0, A), DestTy_umax))
1176 if (SrcTy->isIntegerTy() && isPowerOf2_64(SrcTy->getPrimitiveSizeInBits()) &&
1178 match(Src, m_OneUse(m_Select(
1180 m_Constant(C))),
1181 m_Deferred(A),
1183 ICmpInst::ICMP_SGT, m_Deferred(A), m_Zero())))))))) {
1184 APInt UpperBound = C->getUniqueInteger();
1185 APInt TruncatedMax = APInt::getAllOnes(DestTy->getIntegerBitWidth());
1186 TruncatedMax = TruncatedMax.zext(UpperBound.getBitWidth());
1187 if (!UpperBound.isZero() && UpperBound - 1 == TruncatedMax) {
1188 Value *SMax = Builder.CreateIntrinsic(Intrinsic::smax, {SrcTy},
1189 {ConstantInt::get(SrcTy, 0), A});
1190 Value *SMin = Builder.CreateIntrinsic(
1191 Intrinsic::smin, {SrcTy},
1192 {SMax, ConstantInt::get(SrcTy, TruncatedMax)});
1193 return new TruncInst(SMin, DestTy);
1194 }
1195 }
1196
1197 if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
1198 return I;
1199
1200 if (Instruction *I = foldVecExtTruncToExtElt(Trunc, *this))
1201 return I;
1202
1203 // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
1204 if (match(Src, m_OneUse(m_Ctlz(m_ZExt(m_Value(A)), m_Value(B))))) {
1205 unsigned AWidth = A->getType()->getScalarSizeInBits();
1206 if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
1207 Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
1208 Value *NarrowCtlz =
1209 Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
1210 return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
1211 }
1212 }
1213
1214 if (match(Src, m_VScale())) {
1215 if (Trunc.getFunction() &&
1216 Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1217 Attribute Attr =
1218 Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
1219 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
1220 if (Log2_32(*MaxVScale) < DestWidth)
1221 return replaceInstUsesWith(Trunc, Builder.CreateVScale(DestTy));
1222 }
1223 }
1224
1225 if (DestWidth == 1 &&
1226 (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) &&
1227 isKnownNonZero(Src, SQ.getWithInstruction(&Trunc)))
1228 return replaceInstUsesWith(Trunc, ConstantInt::getTrue(DestTy));
1229
1230 bool Changed = false;
1231 if (!Trunc.hasNoSignedWrap() &&
1232 ComputeMaxSignificantBits(Src, &Trunc) <= DestWidth) {
1233 Trunc.setHasNoSignedWrap(true);
1234 Changed = true;
1235 }
1236 if (!Trunc.hasNoUnsignedWrap() &&
1237 MaskedValueIsZero(Src, APInt::getBitsSetFrom(SrcWidth, DestWidth),
1238 &Trunc)) {
1239 Trunc.setHasNoUnsignedWrap(true);
1240 Changed = true;
1241 }
1242
1243 const APInt *C1;
1244 Value *V1;
1245 // OP = { lshr, ashr }
1246 // trunc ( OP i8 C1, V1) to i1 -> icmp eq V1, log_2(C1) iff C1 is power of 2
1247 if (DestWidth == 1 && match(Src, m_Shr(m_Power2(C1), m_Value(V1)))) {
1248 Value *Right = ConstantInt::get(V1->getType(), C1->countr_zero());
1249 return new ICmpInst(ICmpInst::ICMP_EQ, V1, Right);
1250 }
1251
1252 // OP = { lshr, ashr }
1253 // trunc ( OP i8 C1, V1) to i1 -> icmp ult V1, log_2(C1 + 1) iff (C1 + 1) is
1254 // power of 2
1255 if (DestWidth == 1 && match(Src, m_Shr(m_LowBitMask(C1), m_Value(V1)))) {
1256 Value *Right = ConstantInt::get(V1->getType(), C1->countr_one());
1257 return new ICmpInst(ICmpInst::ICMP_ULT, V1, Right);
1258 }
1259
1260 // OP = { lshr, ashr }
1261 // trunc ( OP i8 C1, V1) to i1 -> icmp ugt V1, cttz(C1) - 1 iff (C1) is
1262 // negative power of 2
1263 if (DestWidth == 1 && match(Src, m_Shr(m_NegatedPower2(C1), m_Value(V1)))) {
1264 Value *Right = ConstantInt::get(V1->getType(), C1->countr_zero());
1265 return new ICmpInst(ICmpInst::ICMP_UGE, V1, Right);
1266 }
1267
1268 return Changed ? &Trunc : nullptr;
1269}
1270
1271Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
1272 ZExtInst &Zext) {
1273 // If we are just checking for a icmp eq of a single bit and zext'ing it
1274 // to an integer, then shift the bit to the appropriate place and then
1275 // cast to integer to avoid the comparison.
1276
1277 // FIXME: This set of transforms does not check for extra uses and/or creates
1278 // an extra instruction (an optional final cast is not included
1279 // in the transform comments). We may also want to favor icmp over
1280 // shifts in cases of equal instructions because icmp has better
1281 // analysis in general (invert the transform).
1282
1283 const APInt *Op1CV;
1284 if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
1285
1286 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
1287 if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
1288 Value *In = Cmp->getOperand(0);
1289 Value *Sh = ConstantInt::get(In->getType(),
1290 In->getType()->getScalarSizeInBits() - 1);
1291 In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
1292 if (In->getType() != Zext.getType())
1293 In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
1294
1295 return replaceInstUsesWith(Zext, In);
1296 }
1297
1298 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
1299 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1300 // zext (X != 0) to i32 --> X iff X has only the low bit set.
1301 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
1302
1303 if (Op1CV->isZero() && Cmp->isEquality()) {
1304 // Exactly 1 possible 1? But not the high-bit because that is
1305 // canonicalized to this form.
1306 KnownBits Known = computeKnownBits(Cmp->getOperand(0), &Zext);
1307 APInt KnownZeroMask(~Known.Zero);
1308 uint32_t ShAmt = KnownZeroMask.logBase2();
1309 bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&
1310 (Zext.getType()->getScalarSizeInBits() != ShAmt + 1);
1311 if (IsExpectShAmt &&
1312 (Cmp->getOperand(0)->getType() == Zext.getType() ||
1313 Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) {
1314 Value *In = Cmp->getOperand(0);
1315 if (ShAmt) {
1316 // Perform a logical shr by shiftamt.
1317 // Insert the shift to put the result in the low bit.
1318 In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
1319 In->getName() + ".lobit");
1320 }
1321
1322 // Toggle the low bit for "X == 0".
1323 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1324 In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));
1325
1326 if (Zext.getType() == In->getType())
1327 return replaceInstUsesWith(Zext, In);
1328
1329 Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
1330 return replaceInstUsesWith(Zext, IntCast);
1331 }
1332 }
1333 }
1334
1335 if (Cmp->isEquality()) {
1336 // Test if a bit is clear/set using a shifted-one mask:
1337 // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
1338 // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
1339 Value *X, *ShAmt;
1340 if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
1341 match(Cmp->getOperand(0),
1342 m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
1343 auto *And = cast<BinaryOperator>(Cmp->getOperand(0));
1344 Value *Shift = And->getOperand(X == And->getOperand(0) ? 1 : 0);
1345 if (Zext.getType() == And->getType() ||
1346 Cmp->getPredicate() != ICmpInst::ICMP_EQ || Shift->hasOneUse()) {
1347 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1348 X = Builder.CreateNot(X);
1349 Value *Lshr = Builder.CreateLShr(X, ShAmt);
1350 Value *And1 =
1351 Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
1352 return replaceInstUsesWith(
1353 Zext, Builder.CreateZExtOrTrunc(And1, Zext.getType()));
1354 }
1355 }
1356 }
1357
1358 return nullptr;
1359}
1360
1361/// Determine if the specified value can be computed in the specified wider type
1362/// and produce the same low bits. If not, return false.
1363///
1364/// If this function returns true, it can also return a non-zero number of bits
1365/// (in BitsToClear) which indicates that the value it computes is correct for
1366/// the zero extend, but that the additional BitsToClear bits need to be zero'd
1367/// out. For example, to promote something like:
1368///
1369/// %B = trunc i64 %A to i32
1370/// %C = lshr i32 %B, 8
1371/// %E = zext i32 %C to i64
1372///
1373/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1374/// set to 8 to indicate that the promoted value needs to have bits 24-31
1375/// cleared in addition to bits 32-63. Since an 'and' will be generated to
1376/// clear the top bits anyway, doing this has no extra cost.
1377///
1378/// This function works on both vectors and scalars.
1379bool TypeEvaluationHelper::canEvaluateZExtd(Value *V, Type *Ty,
1380 unsigned &BitsToClear,
1381 InstCombinerImpl &IC,
1382 Instruction *CxtI) {
1383 TypeEvaluationHelper TYH;
1384 return TYH.canEvaluateZExtdImpl(V, Ty, BitsToClear, IC, CxtI);
1385}
1386bool TypeEvaluationHelper::canEvaluateZExtdImpl(Value *V, Type *Ty,
1387 unsigned &BitsToClear,
1388 InstCombinerImpl &IC,
1389 Instruction *CxtI) {
1390 BitsToClear = 0;
1391 if (canAlwaysEvaluateInType(V, Ty))
1392 return true;
1393 // We stick to the one-user limit for the ZExt transform due to the fact
1394 // that this predicate returns two values: predicate result and BitsToClear.
1395 if (canNotEvaluateInType(V, Ty))
1396 return false;
1397
1398 auto *I = cast<Instruction>(V);
1399 unsigned Tmp;
1400 switch (I->getOpcode()) {
1401 case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1402 case Instruction::SExt: // zext(sext(x)) -> sext(x).
1403 case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1404 return true;
1405 case Instruction::And:
1406 case Instruction::Or:
1407 case Instruction::Xor:
1408 case Instruction::Add:
1409 case Instruction::Sub:
1410 case Instruction::Mul:
1411 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1412 !canEvaluateZExtdImpl(I->getOperand(1), Ty, Tmp, IC, CxtI))
1413 return false;
1414 // These can all be promoted if neither operand has 'bits to clear'.
1415 if (BitsToClear == 0 && Tmp == 0)
1416 return true;
1417
1418 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1419 // other side, BitsToClear is ok.
1420 if (Tmp == 0 && I->isBitwiseLogicOp()) {
1421 // We use MaskedValueIsZero here for generality, but the case we care
1422 // about the most is constant RHS.
1423 unsigned VSize = V->getType()->getScalarSizeInBits();
1424 if (IC.MaskedValueIsZero(I->getOperand(1),
1425 APInt::getHighBitsSet(VSize, BitsToClear),
1426 CxtI)) {
1427 // If this is an And instruction and all of the BitsToClear are
1428 // known to be zero we can reset BitsToClear.
1429 if (I->getOpcode() == Instruction::And)
1430 BitsToClear = 0;
1431 return true;
1432 }
1433 }
1434
1435 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1436 return false;
1437
1438 case Instruction::Shl: {
1439 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1440 // upper bits we can reduce BitsToClear by the shift amount.
1441 uint64_t ShiftAmt;
1442 if (match(I->getOperand(1), m_ConstantInt(ShiftAmt))) {
1443 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1444 return false;
1445 BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1446 return true;
1447 }
1448 return false;
1449 }
1450 case Instruction::LShr: {
1451 // We can promote lshr(x, cst) if we can promote x. This requires the
1452 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1453 uint64_t ShiftAmt;
1454 if (match(I->getOperand(1), m_ConstantInt(ShiftAmt))) {
1455 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1456 return false;
1457 BitsToClear += ShiftAmt;
1458 if (BitsToClear > V->getType()->getScalarSizeInBits())
1459 BitsToClear = V->getType()->getScalarSizeInBits();
1460 return true;
1461 }
1462 // Cannot promote variable LSHR.
1463 return false;
1464 }
1465 case Instruction::Select:
1466 if (!canEvaluateZExtdImpl(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1467 !canEvaluateZExtdImpl(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1468 // TODO: If important, we could handle the case when the BitsToClear are
1469 // known zero in the disagreeing side.
1470 Tmp != BitsToClear)
1471 return false;
1472 return true;
1473
1474 case Instruction::PHI: {
1475 // We can change a phi if we can change all operands. Note that we never
1476 // get into trouble with cyclic PHIs here because we only consider
1477 // instructions with a single use.
1478 PHINode *PN = cast<PHINode>(I);
1479 if (!canEvaluateZExtdImpl(PN->getIncomingValue(0), Ty, BitsToClear, IC,
1480 CxtI))
1481 return false;
1482 for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1483 if (!canEvaluateZExtdImpl(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1484 // TODO: If important, we could handle the case when the BitsToClear
1485 // are known zero in the disagreeing input.
1486 Tmp != BitsToClear)
1487 return false;
1488 return true;
1489 }
1490 case Instruction::Call:
1491 // llvm.vscale() can always be executed in larger type, because the
1492 // value is automatically zero-extended.
1494 if (II->getIntrinsicID() == Intrinsic::vscale)
1495 return true;
1496 return false;
1497 default:
1498 // TODO: Can handle more cases here.
1499 return false;
1500 }
1501}
1502
1504 // If this zero extend is only used by a truncate, let the truncate be
1505 // eliminated before we try to optimize this zext.
1506 if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&
1507 !isa<Constant>(Zext.getOperand(0)))
1508 return nullptr;
1509
1510 // If one of the common conversion will work, do it.
1511 if (Instruction *Result = commonCastTransforms(Zext))
1512 return Result;
1513
1514 Value *Src = Zext.getOperand(0);
1515 Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
1516
1517 // zext nneg bool x -> 0
1518 if (SrcTy->isIntOrIntVectorTy(1) && Zext.hasNonNeg())
1520
1521 // Try to extend the entire expression tree to the wide destination type.
1522 unsigned BitsToClear;
1523 if (shouldChangeType(SrcTy, DestTy) &&
1524 TypeEvaluationHelper::canEvaluateZExtd(Src, DestTy, BitsToClear, *this,
1525 &Zext)) {
1526 assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1527 "Can't clear more bits than in SrcTy");
1528
1529 // Okay, we can transform this! Insert the new expression now.
1530 LLVM_DEBUG(
1531 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1532 " to avoid zero extend: "
1533 << Zext << '\n');
1534 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1535 assert(Res->getType() == DestTy);
1536
1537 // Preserve debug values referring to Src if the zext is its last use.
1538 if (auto *SrcOp = dyn_cast<Instruction>(Src))
1539 if (SrcOp->hasOneUse())
1540 replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);
1541
1542 uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1543 uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1544
1545 // If the high bits are already filled with zeros, just replace this
1546 // cast with the result.
1548 Res, APInt::getHighBitsSet(DestBitSize, DestBitSize - SrcBitsKept),
1549 &Zext))
1550 return replaceInstUsesWith(Zext, Res);
1551
1552 // We need to emit an AND to clear the high bits.
1553 Constant *C = ConstantInt::get(Res->getType(),
1554 APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1555 return BinaryOperator::CreateAnd(Res, C);
1556 }
1557
1558 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1559 // types and if the sizes are just right we can convert this into a logical
1560 // 'and' which will be much cheaper than the pair of casts.
1561 if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1562 // TODO: Subsume this into EvaluateInDifferentType.
1563
1564 // Get the sizes of the types involved. We know that the intermediate type
1565 // will be smaller than A or C, but don't know the relation between A and C.
1566 Value *A = CSrc->getOperand(0);
1567 unsigned SrcSize = A->getType()->getScalarSizeInBits();
1568 unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1569 unsigned DstSize = DestTy->getScalarSizeInBits();
1570 // If we're actually extending zero bits, then if
1571 // SrcSize < DstSize: zext(a & mask)
1572 // SrcSize == DstSize: a & mask
1573 // SrcSize > DstSize: trunc(a) & mask
1574 if (SrcSize < DstSize) {
1575 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1576 Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1577 Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1578 return new ZExtInst(And, DestTy);
1579 }
1580
1581 if (SrcSize == DstSize) {
1582 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1583 return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1584 AndValue));
1585 }
1586 if (SrcSize > DstSize) {
1587 Value *Trunc = Builder.CreateTrunc(A, DestTy);
1588 APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1589 return BinaryOperator::CreateAnd(Trunc,
1590 ConstantInt::get(Trunc->getType(),
1591 AndValue));
1592 }
1593 }
1594
1595 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1596 return transformZExtICmp(Cmp, Zext);
1597
1598 // zext(trunc(X) & C) -> (X & zext(C)).
1599 Constant *C;
1600 Value *X;
1601 if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1602 X->getType() == DestTy)
1603 return BinaryOperator::CreateAnd(X, Builder.CreateZExt(C, DestTy));
1604
1605 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1606 Value *And;
1607 if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1609 X->getType() == DestTy) {
1610 Value *ZC = Builder.CreateZExt(C, DestTy);
1611 return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1612 }
1613
1614 // If we are truncating, masking, and then zexting back to the original type,
1615 // that's just a mask. This is not handled by canEvaluateZextd if the
1616 // intermediate values have extra uses. This could be generalized further for
1617 // a non-constant mask operand.
1618 // zext (and (trunc X), C) --> and X, (zext C)
1619 if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&
1620 X->getType() == DestTy) {
1621 Value *ZextC = Builder.CreateZExt(C, DestTy);
1622 return BinaryOperator::CreateAnd(X, ZextC);
1623 }
1624
1625 if (match(Src, m_VScale())) {
1626 if (Zext.getFunction() &&
1627 Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1628 Attribute Attr =
1629 Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1630 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1631 unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1632 if (Log2_32(*MaxVScale) < TypeWidth)
1633 return replaceInstUsesWith(Zext, Builder.CreateVScale(DestTy));
1634 }
1635 }
1636 }
1637
1638 if (!Zext.hasNonNeg()) {
1639 // If this zero extend is only used by a shift, add nneg flag.
1640 if (Zext.hasOneUse() &&
1641 SrcTy->getScalarSizeInBits() >
1642 Log2_64_Ceil(DestTy->getScalarSizeInBits()) &&
1643 match(Zext.user_back(), m_Shift(m_Value(), m_Specific(&Zext)))) {
1644 Zext.setNonNeg();
1645 return &Zext;
1646 }
1647
1648 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Zext))) {
1649 Zext.setNonNeg();
1650 return &Zext;
1651 }
1652 }
1653
1654 return nullptr;
1655}
1656
1657/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1658Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
1659 SExtInst &Sext) {
1660 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1661 ICmpInst::Predicate Pred = Cmp->getPredicate();
1662
1663 // Don't bother if Op1 isn't of vector or integer type.
1664 if (!Op1->getType()->isIntOrIntVectorTy())
1665 return nullptr;
1666
1667 if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {
1668 // sext (x <s 0) --> ashr x, 31 (all ones if negative)
1669 Value *Sh = ConstantInt::get(Op0->getType(),
1670 Op0->getType()->getScalarSizeInBits() - 1);
1671 Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1672 if (In->getType() != Sext.getType())
1673 In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);
1674
1675 return replaceInstUsesWith(Sext, In);
1676 }
1677
1678 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1679 // If we know that only one bit of the LHS of the icmp can be set and we
1680 // have an equality comparison with zero or a power of 2, we can transform
1681 // the icmp and sext into bitwise/integer operations.
1682 if (Cmp->hasOneUse() &&
1683 Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1684 KnownBits Known = computeKnownBits(Op0, &Sext);
1685
1686 APInt KnownZeroMask(~Known.Zero);
1687 if (KnownZeroMask.isPowerOf2()) {
1688 Value *In = Cmp->getOperand(0);
1689
1690 // If the icmp tests for a known zero bit we can constant fold it.
1691 if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1692 Value *V = Pred == ICmpInst::ICMP_NE ?
1694 ConstantInt::getNullValue(Sext.getType());
1695 return replaceInstUsesWith(Sext, V);
1696 }
1697
1698 if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1699 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1700 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1701 unsigned ShiftAmt = KnownZeroMask.countr_zero();
1702 // Perform a right shift to place the desired bit in the LSB.
1703 if (ShiftAmt)
1704 In = Builder.CreateLShr(In,
1705 ConstantInt::get(In->getType(), ShiftAmt));
1706
1707 // At this point "In" is either 1 or 0. Subtract 1 to turn
1708 // {1, 0} -> {0, -1}.
1709 In = Builder.CreateAdd(In,
1710 ConstantInt::getAllOnesValue(In->getType()),
1711 "sext");
1712 } else {
1713 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1714 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1715 unsigned ShiftAmt = KnownZeroMask.countl_zero();
1716 // Perform a left shift to place the desired bit in the MSB.
1717 if (ShiftAmt)
1718 In = Builder.CreateShl(In,
1719 ConstantInt::get(In->getType(), ShiftAmt));
1720
1721 // Distribute the bit over the whole bit width.
1722 In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1723 KnownZeroMask.getBitWidth() - 1), "sext");
1724 }
1725
1726 if (Sext.getType() == In->getType())
1727 return replaceInstUsesWith(Sext, In);
1728 return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);
1729 }
1730 }
1731 }
1732
1733 return nullptr;
1734}
1735
1736/// Return true if we can take the specified value and return it as type Ty
1737/// without inserting any new casts and without changing the value of the common
1738/// low bits. This is used by code that tries to promote integer operations to
1739/// a wider types will allow us to eliminate the extension.
1740///
1741/// This function works on both vectors and scalars.
1742///
1743bool TypeEvaluationHelper::canEvaluateSExtd(Value *V, Type *Ty) {
1744 TypeEvaluationHelper TYH;
1745 return TYH.canEvaluateSExtdImpl(V, Ty) && TYH.allPendingVisited();
1746}
1747
1748bool TypeEvaluationHelper::canEvaluateSExtdImpl(Value *V, Type *Ty) {
1749 return canEvaluate(V, Ty, [this](Value *V, Type *Ty) {
1750 return canEvaluateSExtdPred(V, Ty);
1751 });
1752}
1753
1754bool TypeEvaluationHelper::canEvaluateSExtdPred(Value *V, Type *Ty) {
1755 assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1756 "Can't sign extend type to a smaller type");
1757
1758 auto *I = cast<Instruction>(V);
1759 switch (I->getOpcode()) {
1760 case Instruction::SExt: // sext(sext(x)) -> sext(x)
1761 case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1762 case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1763 return true;
1764 case Instruction::And:
1765 case Instruction::Or:
1766 case Instruction::Xor:
1767 case Instruction::Add:
1768 case Instruction::Sub:
1769 case Instruction::Mul:
1770 // These operators can all arbitrarily be extended if their inputs can.
1771 return canEvaluateSExtdImpl(I->getOperand(0), Ty) &&
1772 canEvaluateSExtdImpl(I->getOperand(1), Ty);
1773
1774 // case Instruction::Shl: TODO
1775 // case Instruction::LShr: TODO
1776
1777 case Instruction::Select:
1778 return canEvaluateSExtdImpl(I->getOperand(1), Ty) &&
1779 canEvaluateSExtdImpl(I->getOperand(2), Ty);
1780
1781 case Instruction::PHI: {
1782 // We can change a phi if we can change all operands. Note that we never
1783 // get into trouble with cyclic PHIs here because canEvaluate handles use
1784 // chain loops.
1785 PHINode *PN = cast<PHINode>(I);
1786 for (Value *IncValue : PN->incoming_values())
1787 if (!canEvaluateSExtdImpl(IncValue, Ty))
1788 return false;
1789 return true;
1790 }
1791 default:
1792 // TODO: Can handle more cases here.
1793 break;
1794 }
1795
1796 return false;
1797}
1798
1800 // If this sign extend is only used by a truncate, let the truncate be
1801 // eliminated before we try to optimize this sext.
1802 if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))
1803 return nullptr;
1804
1805 if (Instruction *I = commonCastTransforms(Sext))
1806 return I;
1807
1808 Value *Src = Sext.getOperand(0);
1809 Type *SrcTy = Src->getType(), *DestTy = Sext.getType();
1810 unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1811 unsigned DestBitSize = DestTy->getScalarSizeInBits();
1812
1813 // If the value being extended is zero or positive, use a zext instead.
1814 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Sext))) {
1815 auto CI = CastInst::Create(Instruction::ZExt, Src, DestTy);
1816 CI->setNonNeg(true);
1817 return CI;
1818 }
1819
1820 // Try to extend the entire expression tree to the wide destination type.
1821 bool ShouldExtendExpression = true;
1822 Value *TruncSrc = nullptr;
1823 // It is not desirable to extend expression in the trunc + sext pattern when
1824 // destination type is narrower than original (pre-trunc) type.
1825 if (match(Src, m_Trunc(m_Value(TruncSrc))))
1826 if (TruncSrc->getType()->getScalarSizeInBits() > DestBitSize)
1827 ShouldExtendExpression = false;
1828 if (ShouldExtendExpression && shouldChangeType(SrcTy, DestTy) &&
1829 TypeEvaluationHelper::canEvaluateSExtd(Src, DestTy)) {
1830 // Okay, we can transform this! Insert the new expression now.
1831 LLVM_DEBUG(
1832 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1833 " to avoid sign extend: "
1834 << Sext << '\n');
1835 Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1836 assert(Res->getType() == DestTy);
1837
1838 // If the high bits are already filled with sign bit, just replace this
1839 // cast with the result.
1840 if (ComputeNumSignBits(Res, &Sext) > DestBitSize - SrcBitSize)
1841 return replaceInstUsesWith(Sext, Res);
1842
1843 // We need to emit a shl + ashr to do the sign extend.
1844 Value *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1845 return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1846 ShAmt);
1847 }
1848
1849 Value *X = TruncSrc;
1850 if (X) {
1851 // If the input has more sign bits than bits truncated, then convert
1852 // directly to final type.
1853 unsigned XBitSize = X->getType()->getScalarSizeInBits();
1854 bool HasNSW = cast<TruncInst>(Src)->hasNoSignedWrap();
1855 if (HasNSW || (ComputeNumSignBits(X, &Sext) > XBitSize - SrcBitSize)) {
1856 auto *Res = CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1857 if (auto *ResTrunc = dyn_cast<TruncInst>(Res); ResTrunc && HasNSW)
1858 ResTrunc->setHasNoSignedWrap(true);
1859 return Res;
1860 }
1861
1862 // If input is a trunc from the destination type, then convert into shifts.
1863 if (Src->hasOneUse() && X->getType() == DestTy) {
1864 // sext (trunc X) --> ashr (shl X, C), C
1865 Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1866 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1867 }
1868
1869 // If we are replacing shifted-in high zero bits with sign bits, convert
1870 // the logic shift to arithmetic shift and eliminate the cast to
1871 // intermediate type:
1872 // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1873 Value *Y;
1874 if (Src->hasOneUse() &&
1876 m_SpecificIntAllowPoison(XBitSize - SrcBitSize)))) {
1877 Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1878 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1879 }
1880 }
1881
1882 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1883 return transformSExtICmp(Cmp, Sext);
1884
1885 // If the input is a shl/ashr pair of a same constant, then this is a sign
1886 // extension from a smaller value. If we could trust arbitrary bitwidth
1887 // integers, we could turn this into a truncate to the smaller bit and then
1888 // use a sext for the whole extension. Since we don't, look deeper and check
1889 // for a truncate. If the source and dest are the same type, eliminate the
1890 // trunc and extend and just do shifts. For example, turn:
1891 // %a = trunc i32 %i to i8
1892 // %b = shl i8 %a, C
1893 // %c = ashr i8 %b, C
1894 // %d = sext i8 %c to i32
1895 // into:
1896 // %a = shl i32 %i, 32-(8-C)
1897 // %d = ashr i32 %a, 32-(8-C)
1898 Value *A = nullptr;
1899 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1900 Constant *BA = nullptr, *CA = nullptr;
1901 if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1902 m_ImmConstant(CA))) &&
1903 BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1904 Constant *WideCurrShAmt =
1905 ConstantFoldCastOperand(Instruction::SExt, CA, DestTy, DL);
1906 assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");
1907 Constant *NumLowbitsLeft = ConstantExpr::getSub(
1908 ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1909 Constant *NewShAmt = ConstantExpr::getSub(
1910 ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1911 NumLowbitsLeft);
1912 NewShAmt =
1914 A = Builder.CreateShl(A, NewShAmt, Sext.getName());
1915 return BinaryOperator::CreateAShr(A, NewShAmt);
1916 }
1917
1918 // Splatting a bit of constant-index across a value:
1919 // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1920 // If the dest type is different, use a cast (adjust use check).
1921 if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
1922 m_SpecificInt(SrcBitSize - 1))))) {
1923 Type *XTy = X->getType();
1924 unsigned XBitSize = XTy->getScalarSizeInBits();
1925 Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
1926 Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
1927 if (XTy == DestTy)
1928 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
1929 AshrAmtC);
1930 if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
1931 Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
1932 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1933 }
1934 }
1935
1936 if (match(Src, m_VScale())) {
1937 if (Sext.getFunction() &&
1938 Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1939 Attribute Attr =
1940 Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1941 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
1942 if (Log2_32(*MaxVScale) < (SrcBitSize - 1))
1943 return replaceInstUsesWith(Sext, Builder.CreateVScale(DestTy));
1944 }
1945 }
1946
1947 // sext(scmp(x, y)) -> scmp(x, y) with a wider result type.
1948 // sext(ucmp(x, y)) -> ucmp(x, y) with a wider result type.
1949 // scmp/ucmp return only -1, 0, or 1, which sign-extend correctly to any
1950 // wider integer type, so we can sink the extension into the intrinsic.
1951 if (auto *II = dyn_cast<IntrinsicInst>(Src)) {
1952 Intrinsic::ID IID = II->getIntrinsicID();
1953 if ((IID == Intrinsic::scmp || IID == Intrinsic::ucmp) && II->hasOneUse())
1954 return replaceInstUsesWith(
1955 Sext, Builder.CreateIntrinsic(
1956 DestTy, IID, {II->getArgOperand(0), II->getArgOperand(1)}));
1957 }
1958
1959 return nullptr;
1960}
1961
1962/// Return a Constant* for the specified floating-point constant if it fits
1963/// in the specified FP type without changing its value.
1964static bool fitsInFPType(APFloat F, const fltSemantics &Sem) {
1965 bool losesInfo;
1966 (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
1967 return !losesInfo;
1968}
1969
1971 bool PreferBFloat) {
1972 // See if the value can be truncated to bfloat and then reextended.
1973 if (PreferBFloat && fitsInFPType(F, APFloat::BFloat()))
1974 return Type::getBFloatTy(Ctx);
1975 // See if the value can be truncated to half and then reextended.
1976 if (!PreferBFloat && fitsInFPType(F, APFloat::IEEEhalf()))
1977 return Type::getHalfTy(Ctx);
1978 // See if the value can be truncated to float and then reextended.
1980 return Type::getFloatTy(Ctx);
1981 if (&F.getSemantics() == &APFloat::IEEEdouble())
1982 return nullptr; // Won't shrink.
1983 // See if the value can be truncated to double and then reextended.
1985 return Type::getDoubleTy(Ctx);
1986 // Don't try to shrink to various long double types.
1987 return nullptr;
1988}
1989
1990static Type *shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat) {
1991 Type *Ty = CFP->getType();
1992 if (Ty->getScalarType()->isPPC_FP128Ty())
1993 return nullptr; // No constant folding of this.
1994
1995 Type *ShrinkTy =
1996 shrinkFPConstant(CFP->getContext(), CFP->getValueAPF(), PreferBFloat);
1997 if (ShrinkTy)
1998 if (auto *VecTy = dyn_cast<VectorType>(Ty))
1999 ShrinkTy = VectorType::get(ShrinkTy, VecTy);
2000
2001 return ShrinkTy;
2002}
2003
2004// Determine if this is a vector of ConstantFPs and if so, return the minimal
2005// type we can safely truncate all elements to.
2006static Type *shrinkFPConstantVector(Value *V, bool PreferBFloat) {
2007 auto *CV = dyn_cast<Constant>(V);
2008 auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
2009 if (!CV || !CVVTy)
2010 return nullptr;
2011
2012 Type *MinType = nullptr;
2013
2014 unsigned NumElts = CVVTy->getNumElements();
2015
2016 // For fixed-width vectors we find the minimal type by looking
2017 // through the constant values of the vector.
2018 for (unsigned i = 0; i != NumElts; ++i) {
2019 if (isa<UndefValue>(CV->getAggregateElement(i)))
2020 continue;
2021
2022 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
2023 if (!CFP)
2024 return nullptr;
2025
2026 Type *T = shrinkFPConstant(CFP, PreferBFloat);
2027 if (!T)
2028 return nullptr;
2029
2030 // If we haven't found a type yet or this type has a larger mantissa than
2031 // our previous type, this is our new minimal type.
2032 if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
2033 MinType = T;
2034 }
2035
2036 // Make a vector type from the minimal type.
2037 return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;
2038}
2039
2040/// Find the minimum FP type we can safely truncate to.
2041static Type *getMinimumFPType(Value *V, Type *PreferredTy, InstCombiner &IC) {
2042 if (auto *FPExt = dyn_cast<FPExtInst>(V))
2043 return FPExt->getOperand(0)->getType();
2044
2045 Value *Src;
2046 if (match(V, m_IToFP(m_Value(Src))) &&
2047 IC.canBeCastedExactlyIntToFP(Src, PreferredTy, isa<SIToFPInst>(V),
2049 return PreferredTy;
2050
2051 bool PreferBFloat = PreferredTy->getScalarType()->isBFloatTy();
2052 // If this value is a constant, return the constant in the smallest FP type
2053 // that can accurately represent it. This allows us to turn
2054 // (float)((double)X+2.0) into x+2.0f.
2055 if (auto *CFP = dyn_cast<ConstantFP>(V))
2056 if (Type *T = shrinkFPConstant(CFP, PreferBFloat))
2057 return T;
2058
2059 // Try to shrink scalable and fixed splat vectors.
2060 if (auto *FPC = dyn_cast<Constant>(V))
2061 if (auto *VTy = dyn_cast<VectorType>(V->getType()))
2062 if (auto *Splat = dyn_cast_or_null<ConstantFP>(FPC->getSplatValue()))
2063 if (Type *T = shrinkFPConstant(Splat, PreferBFloat))
2064 return VectorType::get(T, VTy);
2065
2066 // Try to shrink a vector of FP constants. This returns nullptr on scalable
2067 // vectors
2068 if (Type *T = shrinkFPConstantVector(V, PreferBFloat))
2069 return T;
2070
2071 return V->getType();
2072}
2073
2075 bool IsSigned,
2076 const Instruction *CxtI) const {
2077 Type *SrcTy = V->getType();
2078 assert(SrcTy->isIntOrIntVectorTy() && "Expected an integer type");
2079 int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
2080 int DestNumSigBits = FPTy->getFPMantissaWidth();
2081
2082 // Easy case - if the source integer type has less bits than the FP mantissa,
2083 // then the cast must be exact.
2084 if (SrcSize <= DestNumSigBits)
2085 return true;
2086
2087 // Cast from FP to integer and back to FP is independent of the intermediate
2088 // integer width because of poison on overflow.
2089 Value *F;
2090 if (match(V, m_FPToI(m_Value(F)))) {
2091 // If this is uitofp (fptosi F), the source needs an extra bit to avoid
2092 // potential rounding of negative FP input values.
2093 int SrcNumSigBits = F->getType()->getFPMantissaWidth();
2094 if (!IsSigned && match(V, m_FPToSI(m_Value())))
2095 SrcNumSigBits++;
2096
2097 // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
2098 // significant bits than the destination (and make sure neither type is
2099 // weird -- ppc_fp128).
2100 if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
2101 SrcNumSigBits <= DestNumSigBits)
2102 return true;
2103 }
2104
2105 // Try harder to find if the source integer type has less significant bits.
2106 // Compute number of sign bits or determine trailing zeros.
2107 KnownBits SrcKnown = computeKnownBits(V, CxtI);
2108 int SigBits = (int)SrcTy->getScalarSizeInBits() -
2109 SrcKnown.countMinLeadingZeros() -
2110 SrcKnown.countMinTrailingZeros();
2111 if (SigBits <= DestNumSigBits)
2112 return true;
2113
2114 // For sitofp, the sign maps to the FP sign bit, so only magnitude bits
2115 // (BitWidth - NumSignBits) consume mantissa.
2116 if (IsSigned) {
2117 SigBits = (int)SrcTy->getScalarSizeInBits() - ComputeNumSignBits(V, CxtI);
2118 if (SigBits <= DestNumSigBits)
2119 return true;
2120 }
2121
2122 return false;
2123}
2124
2126 CastInst::CastOps Opcode = I.getOpcode();
2127 assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
2128 "Unexpected cast");
2129 Value *Src = I.getOperand(0);
2130 Type *FPTy = I.getType();
2131 return canBeCastedExactlyIntToFP(Src, FPTy, Opcode == CastInst::SIToFP, &I);
2132}
2133
2136 return I;
2137
2138 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
2139 // simplify this expression to avoid one or more of the trunc/extend
2140 // operations if we can do so without changing the numerical results.
2141 //
2142 // The exact manner in which the widths of the operands interact to limit
2143 // what we can and cannot do safely varies from operation to operation, and
2144 // is explained below in the various case statements.
2145 Type *Ty = FPT.getType();
2146 auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
2147 if (BO && BO->hasOneUse()) {
2148 Type *LHSMinType = getMinimumFPType(BO->getOperand(0), Ty, *this);
2149 Type *RHSMinType = getMinimumFPType(BO->getOperand(1), Ty, *this);
2150 unsigned OpWidth = BO->getType()->getFPMantissaWidth();
2151 unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
2152 unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
2153 unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
2154 unsigned DstWidth = Ty->getFPMantissaWidth();
2155 switch (BO->getOpcode()) {
2156 default: break;
2157 case Instruction::FAdd:
2158 case Instruction::FSub:
2159 // For addition and subtraction, the infinitely precise result can
2160 // essentially be arbitrarily wide; proving that double rounding
2161 // will not occur because the result of OpI is exact (as we will for
2162 // FMul, for example) is hopeless. However, we *can* nonetheless
2163 // frequently know that double rounding cannot occur (or that it is
2164 // innocuous) by taking advantage of the specific structure of
2165 // infinitely-precise results that admit double rounding.
2166 //
2167 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
2168 // to represent both sources, we can guarantee that the double
2169 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
2170 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
2171 // for proof of this fact).
2172 //
2173 // Note: Figueroa does not consider the case where DstFormat !=
2174 // SrcFormat. It's possible (likely even!) that this analysis
2175 // could be tightened for those cases, but they are rare (the main
2176 // case of interest here is (float)((double)float + float)).
2177 if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
2178 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2179 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2180 Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
2181 RI->copyFastMathFlags(BO);
2182 return RI;
2183 }
2184 break;
2185 case Instruction::FMul:
2186 // For multiplication, the infinitely precise result has at most
2187 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
2188 // that such a value can be exactly represented, then no double
2189 // rounding can possibly occur; we can safely perform the operation
2190 // in the destination format if it can represent both sources.
2191 if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
2192 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2193 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2194 return BinaryOperator::CreateFMulFMF(LHS, RHS, BO);
2195 }
2196 break;
2197 case Instruction::FDiv:
2198 // For division, we use again use the bound from Figueroa's
2199 // dissertation. I am entirely certain that this bound can be
2200 // tightened in the unbalanced operand case by an analysis based on
2201 // the diophantine rational approximation bound, but the well-known
2202 // condition used here is a good conservative first pass.
2203 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
2204 if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
2205 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2206 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2207 return BinaryOperator::CreateFDivFMF(LHS, RHS, BO);
2208 }
2209 break;
2210 case Instruction::FRem: {
2211 // Remainder is straightforward. Remainder is always exact, so the
2212 // type of OpI doesn't enter into things at all. We simply evaluate
2213 // in whichever source type is larger, then convert to the
2214 // destination type.
2215 if (SrcWidth == OpWidth)
2216 break;
2217 Value *LHS, *RHS;
2218 if (LHSWidth == SrcWidth) {
2219 LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
2220 RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
2221 } else {
2222 LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
2223 RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
2224 }
2225
2226 Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
2227 return CastInst::CreateFPCast(ExactResult, Ty);
2228 }
2229 }
2230 }
2231
2232 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
2233 Value *X;
2235 if (Op && Op->hasOneUse()) {
2236 FastMathFlags FMF = FPT.getFastMathFlags();
2237 if (auto *FPMO = dyn_cast<FPMathOperator>(Op))
2238 FMF &= FPMO->getFastMathFlags();
2239
2240 if (match(Op, m_FNeg(m_Value(X)))) {
2241 Value *InnerTrunc = Builder.CreateFPTruncFMF(X, Ty, FMF);
2242 Value *Neg = Builder.CreateFNegFMF(InnerTrunc, FMF);
2243 return replaceInstUsesWith(FPT, Neg);
2244 }
2245
2246 // If we are truncating a select that has an extended operand, we can
2247 // narrow the other operand and do the select as a narrow op.
2248 Value *Cond, *X, *Y;
2250 X->getType() == Ty) {
2251 // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
2252 Value *NarrowY = Builder.CreateFPTruncFMF(Y, Ty, FMF);
2253 Value *Sel =
2254 Builder.CreateSelectFMF(Cond, X, NarrowY, FMF, "narrow.sel", Op);
2255 return replaceInstUsesWith(FPT, Sel);
2256 }
2258 X->getType() == Ty) {
2259 // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
2260 Value *NarrowY = Builder.CreateFPTruncFMF(Y, Ty, FMF);
2261 Value *Sel =
2262 Builder.CreateSelectFMF(Cond, NarrowY, X, FMF, "narrow.sel", Op);
2263 return replaceInstUsesWith(FPT, Sel);
2264 }
2265 }
2266
2267 if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
2268 switch (II->getIntrinsicID()) {
2269 default: break;
2270 case Intrinsic::ceil:
2271 case Intrinsic::fabs:
2272 case Intrinsic::floor:
2273 case Intrinsic::nearbyint:
2274 case Intrinsic::rint:
2275 case Intrinsic::round:
2276 case Intrinsic::roundeven:
2277 case Intrinsic::trunc: {
2278 Value *Src = II->getArgOperand(0);
2279 if (!Src->hasOneUse())
2280 break;
2281
2282 // Except for fabs, this transformation requires the input of the unary FP
2283 // operation to be itself an fpext from the type to which we're
2284 // truncating.
2285 if (II->getIntrinsicID() != Intrinsic::fabs) {
2286 FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
2287 if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
2288 break;
2289 }
2290
2291 // Do unary FP operation on smaller type.
2292 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
2293 Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
2295 FPT.getModule(), II->getIntrinsicID(), Ty);
2297 II->getOperandBundlesAsDefs(OpBundles);
2298 CallInst *NewCI =
2299 CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
2300 // A normal value may be converted to an infinity. It means that we cannot
2301 // propagate ninf from the intrinsic. So we propagate FMF from fptrunc.
2302 NewCI->copyFastMathFlags(&FPT);
2303 return NewCI;
2304 }
2305 }
2306 }
2307
2308 if (Instruction *I = shrinkInsertElt(FPT, Builder))
2309 return I;
2310
2311 Value *Src = FPT.getOperand(0);
2312 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
2313 auto *FPCast = cast<CastInst>(Src);
2314 if (isKnownExactCastIntToFP(*FPCast))
2315 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
2316 }
2317
2318 return nullptr;
2319}
2320
2322 // If the source operand is a cast from integer to FP and known exact, then
2323 // cast the integer operand directly to the destination type.
2324 Type *Ty = FPExt.getType();
2325 Value *Src = FPExt.getOperand(0);
2326 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
2327 auto *FPCast = cast<CastInst>(Src);
2328 if (isKnownExactCastIntToFP(*FPCast))
2329 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
2330 }
2331
2332 return commonCastTransforms(FPExt);
2333}
2334
2335/// fpto{s/u}i[.sat]({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
2336/// This is safe if the intermediate type has enough bits in its mantissa to
2337/// accurately represent all values of X. For example, this won't work with
2338/// i64 -> float -> i64.
2339template <typename FPToIntTy>
2341 constexpr bool IsSaturating = std::is_same_v<FPToIntTy, IntrinsicInst>;
2342
2343 if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
2344 return nullptr;
2345
2346 auto *OpI = cast<CastInst>(FI.getOperand(0));
2347 Value *X = OpI->getOperand(0);
2348 Type *XType = X->getType();
2349 Type *DestType = FI.getType();
2350 bool IsInputSigned = isa<SIToFPInst>(OpI);
2351
2352 bool IsOutputSigned;
2353 if constexpr (IsSaturating)
2354 IsOutputSigned = FI.getIntrinsicID() == Intrinsic::fptosi_sat;
2355 else
2356 IsOutputSigned = isa<FPToSIInst>(FI);
2357
2358 // Since we can assume the conversion won't overflow, our decision as to
2359 // whether the input will fit in the float should depend on the minimum
2360 // of the input range and output range.
2361
2362 // This means this is also safe for a signed input and unsigned output, since
2363 // a negative input would lead to undefined behavior.
2364 if (!isKnownExactCastIntToFP(*OpI)) {
2365 if constexpr (!IsSaturating) {
2366 // The first cast may not round exactly based on the source integer width
2367 // and FP width, but the overflow UB rules can still allow this to fold.
2368 // If the destination type is narrow, that means the intermediate FP value
2369 // must be large enough to hold the source value exactly.
2370 //
2371 // For example, (uint8_t)((float)(uint32_t 16777217) is UB.
2372 int OutputSize = (int)DestType->getScalarSizeInBits();
2373 if (OutputSize > OpI->getType()->getFPMantissaWidth())
2374 return nullptr;
2375 } else {
2376 // Sat intrinsics produce a defined saturated value on overflow, so
2377 // the UB-based shortcut is invalid. Require exactness.
2378 return nullptr;
2379 }
2380 }
2381
2382 unsigned SrcWidth = XType->getScalarSizeInBits();
2383 unsigned DestWidth = DestType->getScalarSizeInBits();
2384
2385 if constexpr (IsSaturating) {
2386 // TODO: cross-sign and narrowing cases could be handled with range
2387 // analysis to prove the source fits in the destination.
2388 if (IsInputSigned != IsOutputSigned || DestWidth < SrcWidth)
2389 return nullptr;
2390 }
2391
2392 if (DestWidth > SrcWidth) {
2393 if (IsInputSigned && IsOutputSigned)
2394 return new SExtInst(X, DestType);
2395 return new ZExtInst(X, DestType);
2396 }
2397 if (DestWidth < SrcWidth)
2398 return new TruncInst(X, DestType);
2399
2400 assert(XType == DestType && "Unexpected types for int to FP to int casts");
2401 return replaceInstUsesWith(FI, X);
2402}
2403
2405template Instruction *
2407
2409 // fpto{u/s}i non-norm --> 0
2410 FPClassTest Mask =
2411 FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal;
2413 FI.getOperand(0), Mask, IC.getSimplifyQuery().getWithInstruction(&FI));
2414 if (FPClass.isKnownNever(Mask))
2416
2417 return nullptr;
2418}
2419
2421 if (Instruction *I = foldItoFPtoI(FI))
2422 return I;
2423
2424 if (Instruction *I = foldFPtoI(FI, *this))
2425 return I;
2426
2427 return commonCastTransforms(FI);
2428}
2429
2431 if (Instruction *I = foldItoFPtoI(FI))
2432 return I;
2433
2434 if (Instruction *I = foldFPtoI(FI, *this))
2435 return I;
2436
2437 return commonCastTransforms(FI);
2438}
2439
2441 if (Instruction *R = commonCastTransforms(CI))
2442 return R;
2443 if (!CI.hasNonNeg() && isKnownNonNegative(CI.getOperand(0), SQ)) {
2444 CI.setNonNeg();
2445 return &CI;
2446 }
2447 return nullptr;
2448}
2449
2451 if (Instruction *R = commonCastTransforms(CI))
2452 return R;
2453 if (isKnownNonNegative(CI.getOperand(0), SQ)) {
2454 auto *UI =
2455 CastInst::Create(Instruction::UIToFP, CI.getOperand(0), CI.getType());
2456 UI->setNonNeg(true);
2457 return UI;
2458 }
2459 return nullptr;
2460}
2461
2463 // If the source integer type is not the intptr_t type for this target, do a
2464 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2465 // cast to be exposed to other transforms.
2466 unsigned AS = CI.getAddressSpace();
2467 if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
2468 DL.getPointerSizeInBits(AS)) {
2469 Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
2470 DL.getIntPtrType(CI.getContext(), AS));
2471 Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);
2472 return new IntToPtrInst(P, CI.getType());
2473 }
2474
2475 // Replace (inttoptr (add (ptrtoint %Base), %Offset)) with
2476 // (getelementptr i8, %Base, %Offset) if the pointer is only used as integer
2477 // value.
2478 Value *Base;
2479 Value *Offset;
2480 auto UsesPointerAsInt = [](User *U) {
2482 return true;
2483 if (auto *P = dyn_cast<PHINode>(U))
2484 return P->hasOneUse() && isa<ICmpInst, PtrToIntInst>(*P->user_begin());
2485 return false;
2486 };
2487 if (match(CI.getOperand(0),
2489 m_Value(Offset)))) &&
2491 Base->getType()->getPointerAddressSpace() &&
2492 all_of(CI.users(), UsesPointerAsInt)) {
2493 return GetElementPtrInst::Create(Builder.getInt8Ty(), Base, Offset);
2494 }
2495
2497 return I;
2498
2499 return nullptr;
2500}
2501
2503 // Look through chain of one-use GEPs.
2504 Type *PtrTy = Ptr->getType();
2506 while (true) {
2507 auto *GEP = dyn_cast<GEPOperator>(Ptr);
2508 if (!GEP || !GEP->hasOneUse())
2509 break;
2510 GEPs.push_back(GEP);
2511 Ptr = GEP->getPointerOperand();
2512 }
2513
2514 // Don't handle case where GEP converts from pointer to vector.
2515 if (GEPs.empty() || PtrTy != Ptr->getType())
2516 return nullptr;
2517
2518 // Check whether we know the integer value of the base pointer.
2519 Value *Res;
2520 Type *IdxTy = DL.getIndexType(PtrTy);
2521 if (match(Ptr, m_OneUse(m_IntToPtr(m_Value(Res)))) &&
2522 Res->getType() == IntTy && IntTy == IdxTy) {
2523 // pass
2524 } else if (isa<ConstantPointerNull>(Ptr)) {
2525 Res = Constant::getNullValue(IdxTy);
2526 } else {
2527 return nullptr;
2528 }
2529
2530 // Perform the entire operation on integers instead.
2531 for (GEPOperator *GEP : reverse(GEPs)) {
2532 Value *Offset = EmitGEPOffset(GEP);
2533 Res = Builder.CreateAdd(Res, Offset, "", GEP->hasNoUnsignedWrap());
2534 }
2535 return Builder.CreateZExtOrTrunc(Res, IntTy);
2536}
2537
2539 // If the destination integer type is not the intptr_t type for this target,
2540 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2541 // to be exposed to other transforms.
2543 Type *SrcTy = SrcOp->getType();
2544 Type *Ty = CI.getType();
2545 unsigned AS = CI.getPointerAddressSpace();
2546 unsigned TySize = Ty->getScalarSizeInBits();
2547 unsigned PtrSize = DL.getPointerSizeInBits(AS);
2548 if (TySize != PtrSize) {
2549 Type *IntPtrTy =
2550 SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
2551 Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
2552 return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
2553 }
2554
2555 // (ptrtoint (ptrmask P, M))
2556 // -> (and (ptrtoint P), M)
2557 // This is generally beneficial as `and` is better supported than `ptrmask`.
2558 Value *Ptr, *Mask;
2560 m_Value(Mask)))) &&
2561 Mask->getType() == Ty)
2562 return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
2563
2564 if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp))
2565 return replaceInstUsesWith(CI, V);
2566
2567 Value *Vec, *Scalar, *Index;
2569 m_Value(Scalar), m_Value(Index)))) &&
2570 Vec->getType() == Ty) {
2571 assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2572 // Convert the scalar to int followed by insert to eliminate one cast:
2573 // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2574 Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2575 return InsertElementInst::Create(Vec, NewCast, Index);
2576 }
2577
2578 return commonCastTransforms(CI);
2579}
2580
2583 Type *Ty = CI.getType();
2584
2585 // (ptrtoaddr (ptrmask P, M))
2586 // -> (and (ptrtoaddr P), M)
2587 // This is generally beneficial as `and` is better supported than `ptrmask`.
2588 Value *Ptr, *Mask;
2590 m_Value(Mask)))) &&
2591 Mask->getType() == Ty)
2592 return BinaryOperator::CreateAnd(Builder.CreatePtrToAddr(Ptr), Mask);
2593
2594 if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp))
2595 return replaceInstUsesWith(CI, V);
2596
2597 // FIXME: Implement variants of ptrtoint folds.
2598 return commonCastTransforms(CI);
2599}
2600
2601/// This input value (which is known to have vector type) is being zero extended
2602/// or truncated to the specified vector type. Since the zext/trunc is done
2603/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2604/// endianness will impact which end of the vector that is extended or
2605/// truncated.
2606///
2607/// A vector is always stored with index 0 at the lowest address, which
2608/// corresponds to the most significant bits for a big endian stored integer and
2609/// the least significant bits for little endian. A trunc/zext of an integer
2610/// impacts the big end of the integer. Thus, we need to add/remove elements at
2611/// the front of the vector for big endian targets, and the back of the vector
2612/// for little endian targets.
2613///
2614/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2615///
2616/// The source and destination vector types may have different element types.
2617static Instruction *
2619 InstCombinerImpl &IC) {
2620 // We can only do this optimization if the output is a multiple of the input
2621 // element size, or the input is a multiple of the output element size.
2622 // Convert the input type to have the same element type as the output.
2623 VectorType *SrcTy = cast<VectorType>(InVal->getType());
2624
2625 if (SrcTy->getElementType() != DestTy->getElementType()) {
2626 // The input types don't need to be identical, but for now they must be the
2627 // same size. There is no specific reason we couldn't handle things like
2628 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2629 // there yet.
2630 if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2631 DestTy->getElementType()->getPrimitiveSizeInBits())
2632 return nullptr;
2633
2634 SrcTy =
2635 FixedVectorType::get(DestTy->getElementType(),
2636 cast<FixedVectorType>(SrcTy)->getNumElements());
2637 InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2638 }
2639
2640 bool IsBigEndian = IC.getDataLayout().isBigEndian();
2641 unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2642 unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2643
2644 assert(SrcElts != DestElts && "Element counts should be different.");
2645
2646 // Now that the element types match, get the shuffle mask and RHS of the
2647 // shuffle to use, which depends on whether we're increasing or decreasing the
2648 // size of the input.
2649 auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
2650 ArrayRef<int> ShuffleMask;
2651 Value *V2;
2652
2653 if (SrcElts > DestElts) {
2654 // If we're shrinking the number of elements (rewriting an integer
2655 // truncate), just shuffle in the elements corresponding to the least
2656 // significant bits from the input and use poison as the second shuffle
2657 // input.
2658 V2 = PoisonValue::get(SrcTy);
2659 // Make sure the shuffle mask selects the "least significant bits" by
2660 // keeping elements from back of the src vector for big endian, and from the
2661 // front for little endian.
2662 ShuffleMask = ShuffleMaskStorage;
2663 if (IsBigEndian)
2664 ShuffleMask = ShuffleMask.take_back(DestElts);
2665 else
2666 ShuffleMask = ShuffleMask.take_front(DestElts);
2667 } else {
2668 // If we're increasing the number of elements (rewriting an integer zext),
2669 // shuffle in all of the elements from InVal. Fill the rest of the result
2670 // elements with zeros from a constant zero.
2671 V2 = Constant::getNullValue(SrcTy);
2672 // Use first elt from V2 when indicating zero in the shuffle mask.
2673 uint32_t NullElt = SrcElts;
2674 // Extend with null values in the "most significant bits" by adding elements
2675 // in front of the src vector for big endian, and at the back for little
2676 // endian.
2677 unsigned DeltaElts = DestElts - SrcElts;
2678 if (IsBigEndian)
2679 ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2680 else
2681 ShuffleMaskStorage.append(DeltaElts, NullElt);
2682 ShuffleMask = ShuffleMaskStorage;
2683 }
2684
2685 return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2686}
2687
2688static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2689 return Value % Ty->getPrimitiveSizeInBits() == 0;
2690}
2691
2692static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2693 return Value / Ty->getPrimitiveSizeInBits();
2694}
2695
2696/// V is a value which is inserted into a vector of VecEltTy.
2697/// Look through the value to see if we can decompose it into
2698/// insertions into the vector. See the example in the comment for
2699/// OptimizeIntegerToVectorInsertions for the pattern this handles.
2700/// The type of V is always a non-zero multiple of VecEltTy's size.
2701/// Shift is the number of bits between the lsb of V and the lsb of
2702/// the vector.
2703///
2704/// This returns false if the pattern can't be matched or true if it can,
2705/// filling in Elements with the elements found here.
2706static bool collectInsertionElements(Value *V, unsigned Shift,
2707 SmallVectorImpl<Value *> &Elements,
2708 Type *VecEltTy, bool isBigEndian) {
2709 assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2710 "Shift should be a multiple of the element type size");
2711
2712 // Undef values never contribute useful bits to the result.
2713 if (isa<UndefValue>(V)) return true;
2714
2715 // If we got down to a value of the right type, we win, try inserting into the
2716 // right element.
2717 if (V->getType() == VecEltTy) {
2718 // Inserting null doesn't actually insert any elements.
2719 if (Constant *C = dyn_cast<Constant>(V))
2720 if (C->isNullValue())
2721 return true;
2722
2723 unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2724 if (isBigEndian)
2725 ElementIndex = Elements.size() - ElementIndex - 1;
2726
2727 // Fail if multiple elements are inserted into this slot.
2728 if (Elements[ElementIndex])
2729 return false;
2730
2731 Elements[ElementIndex] = V;
2732 return true;
2733 }
2734
2735 if (Constant *C = dyn_cast<Constant>(V)) {
2736 // Figure out the # elements this provides, and bitcast it or slice it up
2737 // as required.
2738 unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2739 VecEltTy);
2740 // If the constant is the size of a vector element, we just need to bitcast
2741 // it to the right type so it gets properly inserted.
2742 if (NumElts == 1)
2744 Shift, Elements, VecEltTy, isBigEndian);
2745
2746 // Okay, this is a constant that covers multiple elements. Slice it up into
2747 // pieces and insert each element-sized piece into the vector.
2748 if (!isa<IntegerType>(C->getType()))
2749 C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
2750 C->getType()->getPrimitiveSizeInBits()));
2751 unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2752 Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2753
2754 for (unsigned i = 0; i != NumElts; ++i) {
2755 unsigned ShiftI = i * ElementSize;
2757 Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI));
2758 if (!Piece)
2759 return false;
2760
2761 Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2762 if (!collectInsertionElements(Piece, ShiftI + Shift, Elements, VecEltTy,
2763 isBigEndian))
2764 return false;
2765 }
2766 return true;
2767 }
2768
2769 if (!V->hasOneUse()) return false;
2770
2772 if (!I) return false;
2773 switch (I->getOpcode()) {
2774 default: return false; // Unhandled case.
2775 case Instruction::BitCast:
2776 if (I->getOperand(0)->getType()->isVectorTy())
2777 return false;
2778 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2779 isBigEndian);
2780 case Instruction::ZExt:
2782 I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2783 VecEltTy))
2784 return false;
2785 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2786 isBigEndian);
2787 case Instruction::Or:
2788 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2789 isBigEndian) &&
2790 collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2791 isBigEndian);
2792 case Instruction::Shl: {
2793 // Must be shifting by a constant that is a multiple of the element size.
2794 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2795 if (!CI) return false;
2796 Shift += CI->getZExtValue();
2797 if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2798 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2799 isBigEndian);
2800 }
2801
2802 }
2803}
2804
2805
2806/// If the input is an 'or' instruction, we may be doing shifts and ors to
2807/// assemble the elements of the vector manually.
2808/// Try to rip the code out and replace it with insertelements. This is to
2809/// optimize code like this:
2810///
2811/// %tmp37 = bitcast float %inc to i32
2812/// %tmp38 = zext i32 %tmp37 to i64
2813/// %tmp31 = bitcast float %inc5 to i32
2814/// %tmp32 = zext i32 %tmp31 to i64
2815/// %tmp33 = shl i64 %tmp32, 32
2816/// %ins35 = or i64 %tmp33, %tmp38
2817/// %tmp43 = bitcast i64 %ins35 to <2 x float>
2818///
2819/// Into two insertelements that do "buildvector{%inc, %inc5}".
2821 InstCombinerImpl &IC) {
2822 auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2823 Value *IntInput = CI.getOperand(0);
2824
2825 // if the int input is just an undef value do not try to optimize to vector
2826 // insertions as it will prevent undef propagation
2827 if (isa<UndefValue>(IntInput))
2828 return nullptr;
2829
2830 SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2831 if (!collectInsertionElements(IntInput, 0, Elements,
2832 DestVecTy->getElementType(),
2833 IC.getDataLayout().isBigEndian()))
2834 return nullptr;
2835
2836 // If we succeeded, we know that all of the element are specified by Elements
2837 // or are zero if Elements has a null entry. Recast this as a set of
2838 // insertions.
2839 Value *Result = Constant::getNullValue(CI.getType());
2840 for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2841 if (!Elements[i]) continue; // Unset element.
2842
2843 Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2844 IC.Builder.getInt32(i));
2845 }
2846
2847 return Result;
2848}
2849
2850/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2851/// vector followed by extract element. The backend tends to handle bitcasts of
2852/// vectors better than bitcasts of scalars because vector registers are
2853/// usually not type-specific like scalar integer or scalar floating-point.
2855 InstCombinerImpl &IC) {
2856 Value *VecOp, *Index;
2857 if (!match(BitCast.getOperand(0),
2858 m_OneUse(m_ExtractElt(m_Value(VecOp), m_Value(Index)))))
2859 return nullptr;
2860
2861 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2862 // type to extract from.
2863 Type *DestType = BitCast.getType();
2864 VectorType *VecType = cast<VectorType>(VecOp->getType());
2865 if (VectorType::isValidElementType(DestType)) {
2866 auto *NewVecType = VectorType::get(DestType, VecType);
2867 auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
2868 return ExtractElementInst::Create(NewBC, Index);
2869 }
2870
2871 // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2872 // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2873 auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
2874 if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
2875 return CastInst::Create(Instruction::BitCast, VecOp, DestType);
2876
2877 return nullptr;
2878}
2879
2880/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2882 InstCombiner::BuilderTy &Builder) {
2883 Type *DestTy = BitCast.getType();
2884 BinaryOperator *BO;
2885
2886 if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2887 !BO->isBitwiseLogicOp())
2888 return nullptr;
2889
2890 // FIXME: This transform is restricted to vector types to avoid backend
2891 // problems caused by creating potentially illegal operations. If a fix-up is
2892 // added to handle that situation, we can remove this check.
2893 if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2894 return nullptr;
2895
2896 if (DestTy->isFPOrFPVectorTy()) {
2897 Value *X, *Y;
2898 // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2899 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2901 if (X->getType()->isFPOrFPVectorTy() &&
2902 Y->getType()->isIntOrIntVectorTy()) {
2903 Value *CastedOp =
2904 Builder.CreateBitCast(BO->getOperand(0), Y->getType());
2905 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
2906 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2907 }
2908 if (X->getType()->isIntOrIntVectorTy() &&
2909 Y->getType()->isFPOrFPVectorTy()) {
2910 Value *CastedOp =
2911 Builder.CreateBitCast(BO->getOperand(1), X->getType());
2912 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
2913 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2914 }
2915 }
2916 return nullptr;
2917 }
2918
2919 if (!DestTy->isIntOrIntVectorTy())
2920 return nullptr;
2921
2922 Value *X;
2923 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2924 X->getType() == DestTy && !isa<Constant>(X)) {
2925 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2926 Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2927 return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2928 }
2929
2930 if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2931 X->getType() == DestTy && !isa<Constant>(X)) {
2932 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2933 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2934 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2935 }
2936
2937 // Canonicalize vector bitcasts to come before vector bitwise logic with a
2938 // constant. This eases recognition of special constants for later ops.
2939 // Example:
2940 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2941 Constant *C;
2942 if (match(BO->getOperand(1), m_Constant(C))) {
2943 // bitcast (logic X, C) --> logic (bitcast X, C')
2944 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2945 Value *CastedC = Builder.CreateBitCast(C, DestTy);
2946 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
2947 }
2948
2949 return nullptr;
2950}
2951
2952/// Change the type of a select if we can eliminate a bitcast.
2954 InstCombiner::BuilderTy &Builder) {
2955 Value *Cond, *TVal, *FVal;
2956 if (!match(BitCast.getOperand(0),
2957 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
2958 return nullptr;
2959
2960 // A vector select must maintain the same number of elements in its operands.
2961 Type *CondTy = Cond->getType();
2962 Type *DestTy = BitCast.getType();
2963
2964 auto *DestVecTy = dyn_cast<VectorType>(DestTy);
2965
2966 if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
2967 if (!DestVecTy ||
2968 CondVTy->getElementCount() != DestVecTy->getElementCount())
2969 return nullptr;
2970
2971 auto *Sel = cast<Instruction>(BitCast.getOperand(0));
2972 auto *SrcVecTy = dyn_cast<VectorType>(TVal->getType());
2973
2974 if ((isa<Constant>(TVal) || isa<Constant>(FVal)) &&
2975 (!DestVecTy ||
2976 (SrcVecTy && ElementCount::isKnownLE(DestVecTy->getElementCount(),
2977 SrcVecTy->getElementCount())))) {
2978 // Avoid introducing select of vector (or select of vector with more
2979 // elements) until the backend can undo this transformation.
2980 Value *CastedTVal = Builder.CreateBitCast(TVal, DestTy);
2981 Value *CastedFVal = Builder.CreateBitCast(FVal, DestTy);
2982 return SelectInst::Create(Cond, CastedTVal, CastedFVal, "", nullptr, Sel);
2983 }
2984
2985 // FIXME: This transform is restricted from changing the select between
2986 // scalars and vectors to avoid backend problems caused by creating
2987 // potentially illegal operations. If a fix-up is added to handle that
2988 // situation, we can remove this check.
2989 if ((DestVecTy != nullptr) != (SrcVecTy != nullptr))
2990 return nullptr;
2991
2992 Value *X;
2993 if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2994 !isa<Constant>(X)) {
2995 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2996 Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
2997 return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
2998 }
2999
3000 if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
3001 !isa<Constant>(X)) {
3002 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
3003 Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
3004 return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
3005 }
3006
3007 return nullptr;
3008}
3009
3010/// Check if all users of CI are StoreInsts.
3011static bool hasStoreUsersOnly(CastInst &CI) {
3012 for (User *U : CI.users()) {
3013 if (!isa<StoreInst>(U))
3014 return false;
3015 }
3016 return true;
3017}
3018
3019/// This function handles following case
3020///
3021/// A -> B cast
3022/// PHI
3023/// B -> A cast
3024///
3025/// All the related PHI nodes can be replaced by new PHI nodes with type A.
3026/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
3027Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
3028 PHINode *PN) {
3029 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
3030 if (hasStoreUsersOnly(CI))
3031 return nullptr;
3032
3033 Value *Src = CI.getOperand(0);
3034 Type *SrcTy = Src->getType(); // Type B
3035 Type *DestTy = CI.getType(); // Type A
3036
3037 SmallVector<PHINode *, 4> PhiWorklist;
3038 SmallSetVector<PHINode *, 4> OldPhiNodes;
3039
3040 // Find all of the A->B casts and PHI nodes.
3041 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
3042 // OldPhiNodes is used to track all known PHI nodes, before adding a new
3043 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
3044 PhiWorklist.push_back(PN);
3045 OldPhiNodes.insert(PN);
3046 while (!PhiWorklist.empty()) {
3047 auto *OldPN = PhiWorklist.pop_back_val();
3048 for (Value *IncValue : OldPN->incoming_values()) {
3049 if (isa<Constant>(IncValue))
3050 continue;
3051
3052 if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
3053 // If there is a sequence of one or more load instructions, each loaded
3054 // value is used as address of later load instruction, bitcast is
3055 // necessary to change the value type, don't optimize it. For
3056 // simplicity we give up if the load address comes from another load.
3057 Value *Addr = LI->getOperand(0);
3058 if (Addr == &CI || isa<LoadInst>(Addr))
3059 return nullptr;
3060 // Don't tranform "load <256 x i32>, <256 x i32>*" to
3061 // "load x86_amx, x86_amx*", because x86_amx* is invalid.
3062 // TODO: Remove this check when bitcast between vector and x86_amx
3063 // is replaced with a specific intrinsic.
3064 if (DestTy->isX86_AMXTy())
3065 return nullptr;
3066 if (LI->hasOneUse() && LI->isSimple())
3067 continue;
3068 // If a LoadInst has more than one use, changing the type of loaded
3069 // value may create another bitcast.
3070 return nullptr;
3071 }
3072
3073 if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
3074 if (OldPhiNodes.insert(PNode))
3075 PhiWorklist.push_back(PNode);
3076 continue;
3077 }
3078
3079 auto *BCI = dyn_cast<BitCastInst>(IncValue);
3080 // We can't handle other instructions.
3081 if (!BCI)
3082 return nullptr;
3083
3084 // Verify it's a A->B cast.
3085 Type *TyA = BCI->getOperand(0)->getType();
3086 Type *TyB = BCI->getType();
3087 if (TyA != DestTy || TyB != SrcTy)
3088 return nullptr;
3089 }
3090 }
3091
3092 // Check that each user of each old PHI node is something that we can
3093 // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
3094 for (auto *OldPN : OldPhiNodes) {
3095 for (User *V : OldPN->users()) {
3096 if (auto *SI = dyn_cast<StoreInst>(V)) {
3097 if (!SI->isSimple() || SI->getOperand(0) != OldPN)
3098 return nullptr;
3099 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3100 // Verify it's a B->A cast.
3101 Type *TyB = BCI->getOperand(0)->getType();
3102 Type *TyA = BCI->getType();
3103 if (TyA != DestTy || TyB != SrcTy)
3104 return nullptr;
3105 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
3106 // As long as the user is another old PHI node, then even if we don't
3107 // rewrite it, the PHI web we're considering won't have any users
3108 // outside itself, so it'll be dead.
3109 if (!OldPhiNodes.contains(PHI))
3110 return nullptr;
3111 } else {
3112 return nullptr;
3113 }
3114 }
3115 }
3116
3117 // For each old PHI node, create a corresponding new PHI node with a type A.
3118 SmallDenseMap<PHINode *, PHINode *> NewPNodes;
3119 for (auto *OldPN : OldPhiNodes) {
3120 Builder.SetInsertPoint(OldPN);
3121 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
3122 NewPNodes[OldPN] = NewPN;
3123 }
3124
3125 // Fill in the operands of new PHI nodes.
3126 for (auto *OldPN : OldPhiNodes) {
3127 PHINode *NewPN = NewPNodes[OldPN];
3128 for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
3129 Value *V = OldPN->getOperand(j);
3130 Value *NewV = nullptr;
3131 if (auto *C = dyn_cast<Constant>(V)) {
3132 NewV = ConstantExpr::getBitCast(C, DestTy);
3133 } else if (auto *LI = dyn_cast<LoadInst>(V)) {
3134 // Explicitly perform load combine to make sure no opposing transform
3135 // can remove the bitcast in the meantime and trigger an infinite loop.
3136 Builder.SetInsertPoint(LI);
3137 NewV = combineLoadToNewType(*LI, DestTy);
3138 // Remove the old load and its use in the old phi, which itself becomes
3139 // dead once the whole transform finishes.
3140 replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
3142 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3143 NewV = BCI->getOperand(0);
3144 } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
3145 NewV = NewPNodes[PrevPN];
3146 }
3147 assert(NewV);
3148 NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
3149 }
3150 }
3151
3152 // Traverse all accumulated PHI nodes and process its users,
3153 // which are Stores and BitcCasts. Without this processing
3154 // NewPHI nodes could be replicated and could lead to extra
3155 // moves generated after DeSSA.
3156 // If there is a store with type B, change it to type A.
3157
3158
3159 // Replace users of BitCast B->A with NewPHI. These will help
3160 // later to get rid off a closure formed by OldPHI nodes.
3161 Instruction *RetVal = nullptr;
3162 for (auto *OldPN : OldPhiNodes) {
3163 PHINode *NewPN = NewPNodes[OldPN];
3164 for (User *V : make_early_inc_range(OldPN->users())) {
3165 if (auto *SI = dyn_cast<StoreInst>(V)) {
3166 assert(SI->isSimple() && SI->getOperand(0) == OldPN);
3167 Builder.SetInsertPoint(SI);
3168 auto *NewBC =
3169 cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
3170 SI->setOperand(0, NewBC);
3171 Worklist.push(SI);
3172 assert(hasStoreUsersOnly(*NewBC));
3173 }
3174 else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3175 Type *TyB = BCI->getOperand(0)->getType();
3176 Type *TyA = BCI->getType();
3177 assert(TyA == DestTy && TyB == SrcTy);
3178 (void) TyA;
3179 (void) TyB;
3180 Instruction *I = replaceInstUsesWith(*BCI, NewPN);
3181 if (BCI == &CI)
3182 RetVal = I;
3183 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
3184 assert(OldPhiNodes.contains(PHI));
3185 (void) PHI;
3186 } else {
3187 llvm_unreachable("all uses should be handled");
3188 }
3189 }
3190 }
3191
3192 return RetVal;
3193}
3194
3195/// Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to
3196/// copysign((bitcast Y to fp), X)
3198 InstCombiner::BuilderTy &Builder,
3199 const SimplifyQuery &SQ) {
3200 Value *X, *Y;
3201 Type *FTy = CI.getType();
3202 if (!FTy->isFPOrFPVectorTy())
3203 return nullptr;
3206 m_Value(Y)))))
3207 return nullptr;
3208 if (X->getType() != FTy)
3209 return nullptr;
3210 if (!isKnownNonNegative(Y, SQ))
3211 return nullptr;
3212
3213 return Builder.CreateCopySign(Builder.CreateBitCast(Y, FTy), X);
3214}
3215
3217 // If the operands are integer typed then apply the integer transforms,
3218 // otherwise just apply the common ones.
3219 Value *Src = CI.getOperand(0);
3220 Type *SrcTy = Src->getType();
3221 Type *DestTy = CI.getType();
3222
3223 // Get rid of casts from one type to the same type. These are useless and can
3224 // be replaced by the operand.
3225 if (DestTy == Src->getType())
3226 return replaceInstUsesWith(CI, Src);
3227
3228 if (isa<FixedVectorType>(DestTy)) {
3229 if (isa<IntegerType>(SrcTy)) {
3230 // If this is a cast from an integer to vector, check to see if the input
3231 // is a trunc or zext of a bitcast from vector. If so, we can replace all
3232 // the casts with a shuffle and (potentially) a bitcast.
3233 if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
3234 CastInst *SrcCast = cast<CastInst>(Src);
3235 if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
3236 if (isa<VectorType>(BCIn->getOperand(0)->getType()))
3238 BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
3239 return I;
3240 }
3241
3242 // If the input is an 'or' instruction, we may be doing shifts and ors to
3243 // assemble the elements of the vector manually. Try to rip the code out
3244 // and replace it with insertelements.
3245 if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
3246 return replaceInstUsesWith(CI, V);
3247 }
3248 }
3249
3250 if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
3251 if (SrcVTy->getNumElements() == 1) {
3252 // If our destination is not a vector, then make this a straight
3253 // scalar-scalar cast.
3254 if (!DestTy->isVectorTy()) {
3255 Value *Elem =
3256 Builder.CreateExtractElement(Src,
3258 return CastInst::Create(Instruction::BitCast, Elem, DestTy);
3259 }
3260
3261 // Otherwise, see if our source is an insert. If so, then use the scalar
3262 // component directly:
3263 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
3264 if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
3265 return new BitCastInst(InsElt->getOperand(1), DestTy);
3266 }
3267
3268 // Convert an artificial vector insert into more analyzable bitwise logic.
3269 unsigned BitWidth = DestTy->getScalarSizeInBits();
3270 Value *X, *Y;
3271 uint64_t IndexC;
3273 m_Value(Y), m_ConstantInt(IndexC)))) &&
3274 DestTy->isIntegerTy() && X->getType() == DestTy &&
3275 Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
3276 // Adjust for big endian - the LSBs are at the high index.
3277 if (DL.isBigEndian())
3278 IndexC = SrcVTy->getNumElements() - 1 - IndexC;
3279
3280 // We only handle (endian-normalized) insert to index 0. Any other insert
3281 // would require a left-shift, so that is an extra instruction.
3282 if (IndexC == 0) {
3283 // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
3284 unsigned EltWidth = Y->getType()->getScalarSizeInBits();
3285 APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
3286 Value *AndX = Builder.CreateAnd(X, MaskC);
3287 Value *ZextY = Builder.CreateZExt(Y, DestTy);
3288 return BinaryOperator::CreateOr(AndX, ZextY);
3289 }
3290 }
3291 }
3292
3293 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
3294 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
3295 // a bitcast to a vector with the same # elts.
3296 Value *ShufOp0 = Shuf->getOperand(0);
3297 Value *ShufOp1 = Shuf->getOperand(1);
3298 auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
3299 auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
3300 if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
3301 cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
3302 ShufElts == SrcVecElts) {
3303 BitCastInst *Tmp;
3304 // If either of the operands is a cast from CI.getType(), then
3305 // evaluating the shuffle in the casted destination's type will allow
3306 // us to eliminate at least one cast.
3307 if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
3308 Tmp->getOperand(0)->getType() == DestTy) ||
3309 ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
3310 Tmp->getOperand(0)->getType() == DestTy)) {
3311 Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
3312 Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
3313 // Return a new shuffle vector. Use the same element ID's, as we
3314 // know the vector types match #elts.
3315 return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
3316 }
3317 }
3318
3319 // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
3320 // as a byte/bit swap:
3321 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
3322 // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
3323 if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&
3324 Shuf->hasOneUse() && Shuf->isReverse()) {
3325 unsigned IntrinsicNum = 0;
3326 if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
3327 SrcTy->getScalarSizeInBits() == 8) {
3328 IntrinsicNum = Intrinsic::bswap;
3329 } else if (SrcTy->getScalarSizeInBits() == 1) {
3330 IntrinsicNum = Intrinsic::bitreverse;
3331 }
3332 if (IntrinsicNum != 0) {
3333 assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
3334 assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
3335 Function *BswapOrBitreverse = Intrinsic::getOrInsertDeclaration(
3336 CI.getModule(), IntrinsicNum, DestTy);
3337 Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
3338 return CallInst::Create(BswapOrBitreverse, {ScalarX});
3339 }
3340 }
3341 }
3342
3343 // Handle the A->B->A cast, and there is an intervening PHI node.
3344 if (PHINode *PN = dyn_cast<PHINode>(Src))
3345 if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
3346 return I;
3347
3348 if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
3349 return I;
3350
3352 return I;
3353
3355 return I;
3356
3357 if (Value *V = foldCopySignIdioms(CI, Builder, SQ.getWithInstruction(&CI)))
3358 return replaceInstUsesWith(CI, V);
3359
3360 return commonCastTransforms(CI);
3361}
3362
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file defines the DenseMap class.
static bool isSigned(unsigned Opcode)
Hexagon Common GEP
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
static Value * foldCopySignIdioms(BitCastInst &CI, InstCombiner::BuilderTy &Builder, const SimplifyQuery &SQ)
Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to copysign((bitcast Y to fp),...
static Type * shrinkFPConstantVector(Value *V, bool PreferBFloat)
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
static Instruction * foldFPtoI(Instruction &FI, InstCombiner &IC)
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
static bool fitsInFPType(APFloat F, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
SmallDenseMap< Value *, Value *, 8 > EvaluatedMap
static Type * getMinimumFPType(Value *V, Type *PreferredTy, InstCombiner &IC)
Find the minimum FP type we can safely truncate to.
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
static Type * shrinkFPConstant(LLVMContext &Ctx, const APFloat &F, bool PreferBFloat)
static Instruction * foldVecExtTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Whenever an element is extracted from a vector, optionally shifted down, and then truncated,...
static Value * EvaluateInDifferentTypeImpl(Value *V, Type *Ty, bool isSigned, InstCombinerImpl &IC, EvaluatedMap &Processed)
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
uint64_t IntrinsicInst * II
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
Value * RHS
Value * LHS
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:228
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1615
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1054
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1708
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
int32_t exactLogBase2() const
Definition APInt.h:1806
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
This class represents a conversion between pointers from one address space to another.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:39
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
LLVM_ABI std::optional< unsigned > getVScaleRangeMax() const
Returns the maximum value for the vscale_range attribute or std::nullopt when unknown.
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:244
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:248
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Type * getSrcTy() const
Return the source type, as a convenience.
Definition InstrTypes.h:615
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:610
static LLVM_ABI unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, const DataLayout *DL)
Determine how a pair of casts can be eliminated, if they can be at all.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
static LLVM_ABI CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a Trunc or BitCast cast instruction.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Type * getDestTy() const
Return the destination type, as a convenience.
Definition InstrTypes.h:617
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const APFloat & getValueAPF() const
Definition Constants.h:463
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
bool uge(uint64_t Num) const
This function will return true iff this constant represents a value with active bits bigger than 64 b...
Definition Constants.h:262
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
bool isBigEndian() const
Definition DataLayout.h:218
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:205
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This class represents an extension of floating point types.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
This class represents a truncation of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2622
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition IRBuilder.h:534
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:529
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2237
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * visitZExt(ZExtInst &Zext)
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Instruction * visitSExt(SExtInst &Sext)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Instruction * visitFPToSI(FPToSIInst &FI)
Instruction * visitTrunc(TruncInst &CI)
Instruction * visitUIToFP(CastInst &CI)
Instruction * visitPtrToInt(PtrToIntInst &CI)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * foldItoFPtoI(FPToIntTy &FI)
fpto{s/u}i.sat --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate type has enou...
Instruction * visitSIToFP(CastInst &CI)
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitFPTrunc(FPTruncInst &CI)
Value * foldPtrToIntOrAddrOfGEP(Type *IntTy, Value *Ptr)
Instruction * visitBitCast(BitCastInst &CI)
Instruction * visitIntToPtr(IntToPtrInst &CI)
Instruction * visitFPToUI(FPToUIInst &FI)
Instruction * visitPtrToAddr(PtrToAddrInst &CI)
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
Instruction * visitFPExt(CastInst &CI)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
unsigned ComputeNumSignBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
bool canBeCastedExactlyIntToFP(Value *V, Type *FPTy, bool IsSigned, const Instruction *CxtI=nullptr) const
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
bool isKnownExactCastIntToFP(CastInst &I) const
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
BuilderTy & Builder
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool hasNonNeg() const LLVM_READONLY
Determine whether the the nneg flag is set.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag.
This class represents a cast from an integer to a pointer.
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents a cast from a pointer to an address (non-capturing ptrtoint).
Value * getPointerOperand()
Gets the pointer operand.
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
This class represents a sign extension of integer types.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This instruction constructs a fixed permutation of two input vectors.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class represents a truncation of integer types.
void setHasNoSignedWrap(bool B)
void setHasNoUnsignedWrap(bool B)
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:263
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:287
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition Type.h:202
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:291
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition Type.cpp:241
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:110
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:288
'undef' values are things that do not have specified contents.
Definition Constants.h:1612
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
This class represents zero extension of integer types.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:237
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_lowbit_mask > m_LowBitMask()
Match an integer or vector with only the low bit(s) set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrToIntSameSize_match< OpTy > m_PtrToIntSameSize(const DataLayout &DL, const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
cst_pred_ty< is_sign_mask > m_SignMask()
Match an integer or vector with only the sign bit(s) set.
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
specific_intval< true > m_SpecificIntAllowPoison(const APInt &V)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, UIToFPInst >, CastInst_match< OpTy, SIToFPInst > > m_IToFP(const OpTy &Op)
auto m_Value()
Match an arbitrary value and ignore it.
auto m_Constant()
Match an arbitrary Constant and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_or< CastInst_match< OpTy, FPToUIInst >, CastInst_match< OpTy, FPToSIInst > > m_FPToI(const OpTy &Op)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_Ctlz(const Opnd0 &Op0, const Opnd1 &Op1)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
Definition MathExtras.h:350
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI Value * simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, const SimplifyQuery &Q)
Given operands for a CastInst, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition Local.cpp:2429
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
DWARFExpression::Operation Op
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2165
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
bool isKnownNever(FPClassTest Mask) const
Return true if it's known this can never be one of the mask entries.
Matching combinators.
SimplifyQuery getWithInstruction(const Instruction *I) const