LLVM 22.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1//===- InstCombineCasts.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for cast operations.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APInt.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/DataLayout.h"
22#include "llvm/IR/DebugInfo.h"
23#include "llvm/IR/Instruction.h"
25#include "llvm/IR/Type.h"
26#include "llvm/IR/Value.h"
29#include <iterator>
30#include <optional>
31
32using namespace llvm;
33using namespace PatternMatch;
34
35#define DEBUG_TYPE "instcombine"
36
38
41 EvaluatedMap &Processed) {
42 // Since we cover transformation of instructions with multiple users, we might
43 // come to the same node via multiple paths. We should not create a
44 // replacement for every single one of them though.
45 if (Value *Result = Processed.lookup(V))
46 return Result;
47
50
51 // Otherwise, it must be an instruction.
53 Instruction *Res = nullptr;
54 unsigned Opc = I->getOpcode();
55 switch (Opc) {
56 case Instruction::Add:
57 case Instruction::Sub:
58 case Instruction::Mul:
59 case Instruction::And:
60 case Instruction::Or:
61 case Instruction::Xor:
62 case Instruction::AShr:
63 case Instruction::LShr:
64 case Instruction::Shl:
65 case Instruction::UDiv:
66 case Instruction::URem: {
67 Value *LHS = EvaluateInDifferentTypeImpl(I->getOperand(0), Ty, isSigned, IC,
68 Processed);
69 Value *RHS = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned, IC,
70 Processed);
72 if (Opc == Instruction::LShr || Opc == Instruction::AShr)
73 Res->setIsExact(I->isExact());
74 break;
75 }
76 case Instruction::Trunc:
77 case Instruction::ZExt:
78 case Instruction::SExt:
79 // If the source type of the cast is the type we're trying for then we can
80 // just return the source. There's no need to insert it because it is not
81 // new.
82 if (I->getOperand(0)->getType() == Ty)
83 return I->getOperand(0);
84
85 // Otherwise, must be the same type of cast, so just reinsert a new one.
86 // This also handles the case of zext(trunc(x)) -> zext(x).
87 Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
88 Opc == Instruction::SExt);
89 break;
90 case Instruction::Select: {
91 Value *True = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned,
92 IC, Processed);
93 Value *False = EvaluateInDifferentTypeImpl(I->getOperand(2), Ty, isSigned,
94 IC, Processed);
95 Res = SelectInst::Create(I->getOperand(0), True, False);
96 break;
97 }
98 case Instruction::PHI: {
99 PHINode *OPN = cast<PHINode>(I);
101 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
103 isSigned, IC, Processed);
104 NPN->addIncoming(V, OPN->getIncomingBlock(i));
105 }
106 Res = NPN;
107 break;
108 }
109 case Instruction::FPToUI:
110 case Instruction::FPToSI:
111 Res = CastInst::Create(static_cast<Instruction::CastOps>(Opc),
112 I->getOperand(0), Ty);
113 break;
114 case Instruction::Call:
116 switch (II->getIntrinsicID()) {
117 default:
118 llvm_unreachable("Unsupported call!");
119 case Intrinsic::vscale: {
121 I->getModule(), Intrinsic::vscale, {Ty});
122 Res = CallInst::Create(Fn->getFunctionType(), Fn);
123 break;
124 }
125 }
126 }
127 break;
128 case Instruction::ShuffleVector: {
129 auto *ScalarTy = cast<VectorType>(Ty)->getElementType();
130 auto *VTy = cast<VectorType>(I->getOperand(0)->getType());
131 auto *FixedTy = VectorType::get(ScalarTy, VTy->getElementCount());
132 Value *Op0 = EvaluateInDifferentTypeImpl(I->getOperand(0), FixedTy,
133 isSigned, IC, Processed);
134 Value *Op1 = EvaluateInDifferentTypeImpl(I->getOperand(1), FixedTy,
135 isSigned, IC, Processed);
136 Res = new ShuffleVectorInst(Op0, Op1,
137 cast<ShuffleVectorInst>(I)->getShuffleMask());
138 break;
139 }
140 default:
141 // TODO: Can handle more cases here.
142 llvm_unreachable("Unreachable!");
143 }
144
145 Res->takeName(I);
146 Value *Result = IC.InsertNewInstWith(Res, I->getIterator());
147 // There is no need in keeping track of the old value/new value relationship
148 // when we have only one user, we came have here from that user and no-one
149 // else cares.
150 if (!V->hasOneUse())
151 Processed[V] = Result;
152
153 return Result;
154}
155
156/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
157/// true for, actually insert the code to evaluate the expression.
159 bool isSigned) {
160 EvaluatedMap Processed;
161 return EvaluateInDifferentTypeImpl(V, Ty, isSigned, *this, Processed);
162}
163
165InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
166 const CastInst *CI2) {
167 Type *SrcTy = CI1->getSrcTy();
168 Type *MidTy = CI1->getDestTy();
169 Type *DstTy = CI2->getDestTy();
170
171 Instruction::CastOps firstOp = CI1->getOpcode();
172 Instruction::CastOps secondOp = CI2->getOpcode();
173 Type *SrcIntPtrTy =
174 SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
175 Type *DstIntPtrTy =
176 DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
177 unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
178 DstTy, &DL);
179
180 // We don't want to form an inttoptr or ptrtoint that converts to an integer
181 // type that differs from the pointer size.
182 if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
183 (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
184 Res = 0;
185
186 return Instruction::CastOps(Res);
187}
188
189/// Implement the transforms common to all CastInst visitors.
191 Value *Src = CI.getOperand(0);
192 Type *Ty = CI.getType();
193
194 if (Value *Res =
195 simplifyCastInst(CI.getOpcode(), Src, Ty, SQ.getWithInstruction(&CI)))
196 return replaceInstUsesWith(CI, Res);
197
198 // Try to eliminate a cast of a cast.
199 if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
200 if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
201 // The first cast (CSrc) is eliminable so we need to fix up or replace
202 // the second cast (CI). CSrc will then have a good chance of being dead.
203 auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
204 // Point debug users of the dying cast to the new one.
205 if (CSrc->hasOneUse())
206 replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
207 return Res;
208 }
209 }
210
211 if (auto *Sel = dyn_cast<SelectInst>(Src)) {
212 // We are casting a select. Try to fold the cast into the select if the
213 // select does not have a compare instruction with matching operand types
214 // or the select is likely better done in a narrow type.
215 // Creating a select with operands that are different sizes than its
216 // condition may inhibit other folds and lead to worse codegen.
217 auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
218 if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
219 (CI.getOpcode() == Instruction::Trunc &&
220 shouldChangeType(CI.getSrcTy(), CI.getType()))) {
221
222 // If it's a bitcast involving vectors, make sure it has the same number
223 // of elements on both sides.
224 if (CI.getOpcode() != Instruction::BitCast ||
226 if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
227 replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
228 return NV;
229 }
230 }
231 }
232 }
233
234 // If we are casting a PHI, then fold the cast into the PHI.
235 if (auto *PN = dyn_cast<PHINode>(Src)) {
236 // Don't do this if it would create a PHI node with an illegal type from a
237 // legal type.
238 if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
239 shouldChangeType(CI.getSrcTy(), CI.getType()))
240 if (Instruction *NV = foldOpIntoPhi(CI, PN))
241 return NV;
242 }
243
244 // Canonicalize a unary shuffle after the cast if neither operation changes
245 // the size or element size of the input vector.
246 // TODO: We could allow size-changing ops if that doesn't harm codegen.
247 // cast (shuffle X, Mask) --> shuffle (cast X), Mask
248 Value *X;
249 ArrayRef<int> Mask;
250 if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {
251 // TODO: Allow scalable vectors?
252 auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
253 auto *DestTy = dyn_cast<FixedVectorType>(Ty);
254 if (SrcTy && DestTy &&
255 SrcTy->getNumElements() == DestTy->getNumElements() &&
256 SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
257 Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
258 return new ShuffleVectorInst(CastX, Mask);
259 }
260 }
261
262 return nullptr;
263}
264
265namespace {
266
267/// Helper class for evaluating whether a value can be computed in a different
268/// type without changing its value. Used by cast simplification transforms.
269class TypeEvaluationHelper {
270public:
271 /// Return true if we can evaluate the specified expression tree as type Ty
272 /// instead of its larger type, and arrive with the same value.
273 /// This is used by code that tries to eliminate truncates.
274 [[nodiscard]] static bool canEvaluateTruncated(Value *V, Type *Ty,
276 Instruction *CxtI);
277
278 /// Determine if the specified value can be computed in the specified wider
279 /// type and produce the same low bits. If not, return false.
280 [[nodiscard]] static bool canEvaluateZExtd(Value *V, Type *Ty,
281 unsigned &BitsToClear,
283 Instruction *CxtI);
284
285 /// Return true if we can take the specified value and return it as type Ty
286 /// without inserting any new casts and without changing the value of the
287 /// common low bits.
288 [[nodiscard]] static bool canEvaluateSExtd(Value *V, Type *Ty);
289
290private:
291 /// Constants and extensions/truncates from the destination type are always
292 /// free to be evaluated in that type.
293 [[nodiscard]] static bool canAlwaysEvaluateInType(Value *V, Type *Ty);
294
295 /// Check if we traversed all the users of the multi-use values we've seen.
296 [[nodiscard]] bool allPendingVisited() const {
297 return llvm::all_of(Pending,
298 [this](Value *V) { return Visited.contains(V); });
299 }
300
301 /// A generic wrapper for canEvaluate* recursions to inject visitation
302 /// tracking and enforce correct multi-use value evaluations.
303 [[nodiscard]] bool
304 canEvaluate(Value *V, Type *Ty,
305 llvm::function_ref<bool(Value *, Type *Type)> Pred) {
306 if (canAlwaysEvaluateInType(V, Ty))
307 return true;
308
309 auto *I = dyn_cast<Instruction>(V);
310
311 if (I == nullptr)
312 return false;
313
314 // We insert false by default to return false when we encounter user loops.
315 const auto [It, Inserted] = Visited.insert({V, false});
316
317 // There are three possible cases for us having information on this value
318 // in the Visited map:
319 // 1. We properly checked it and concluded that we can evaluate it (true)
320 // 2. We properly checked it and concluded that we can't (false)
321 // 3. We started to check it, but during the recursive traversal we came
322 // back to it.
323 //
324 // For cases 1 and 2, we can safely return the stored result. For case 3, we
325 // can potentially have a situation where we can evaluate recursive user
326 // chains, but that can be quite tricky to do properly and isntead, we
327 // return false.
328 //
329 // In any case, we should return whatever was there in the map to begin
330 // with.
331 if (!Inserted)
332 return It->getSecond();
333
334 // We can easily make a decision about single-user values whether they can
335 // be evaluated in a different type or not, we came from that user. This is
336 // not as simple for multi-user values.
337 //
338 // In general, we have the following case (inverted control-flow, users are
339 // at the top):
340 //
341 // Cast %A
342 // ____|
343 // /
344 // %A = Use %B, %C
345 // ________| |
346 // / |
347 // %B = Use %D |
348 // ________| |
349 // / |
350 // %D = Use %C |
351 // ________|___|
352 // /
353 // %C = ...
354 //
355 // In this case, when we check %A, %B and %D, we are confident that we can
356 // make the decision here and now, since we came from their only users.
357 //
358 // For %C, it is harder. We come there twice, and when we come the first
359 // time, it's hard to tell if we will visit the second user (technically
360 // it's not hard, but we might need a lot of repetitive checks with non-zero
361 // cost).
362 //
363 // In the case above, we are allowed to evaluate %C in different type
364 // because all of it users were part of the traversal.
365 //
366 // In the following case, however, we can't make this conclusion:
367 //
368 // Cast %A
369 // ____|
370 // /
371 // %A = Use %B, %C
372 // ________| |
373 // / |
374 // %B = Use %D |
375 // ________| |
376 // / |
377 // %D = Use %C |
378 // | |
379 // foo(%C) | | <- never traversing foo(%C)
380 // ________|___|
381 // /
382 // %C = ...
383 //
384 // In this case, we still can evaluate %C in a different type, but we'd need
385 // to create a copy of the original %C to be used in foo(%C). Such
386 // duplication might be not profitable.
387 //
388 // For this reason, we collect all users of the mult-user values and mark
389 // them as "pending" and defer this decision to the very end. When we are
390 // done and and ready to have a positive verdict, we should double-check all
391 // of the pending users and ensure that we visited them. allPendingVisited
392 // predicate checks exactly that.
393 if (!I->hasOneUse())
394 llvm::append_range(Pending, I->users());
395
396 const bool Result = Pred(V, Ty);
397 // We have to set result this way and not via It because Pred is recursive
398 // and it is very likely that we grew Visited and invalidated It.
399 Visited[V] = Result;
400 return Result;
401 }
402
403 /// Filter out values that we can not evaluate in the destination type for
404 /// free.
405 [[nodiscard]] bool canNotEvaluateInType(Value *V, Type *Ty);
406
407 [[nodiscard]] bool canEvaluateTruncatedImpl(Value *V, Type *Ty,
408 InstCombinerImpl &IC,
409 Instruction *CxtI);
410 [[nodiscard]] bool canEvaluateTruncatedPred(Value *V, Type *Ty,
411 InstCombinerImpl &IC,
412 Instruction *CxtI);
413 [[nodiscard]] bool canEvaluateZExtdImpl(Value *V, Type *Ty,
414 unsigned &BitsToClear,
415 InstCombinerImpl &IC,
416 Instruction *CxtI);
417 [[nodiscard]] bool canEvaluateSExtdImpl(Value *V, Type *Ty);
418 [[nodiscard]] bool canEvaluateSExtdPred(Value *V, Type *Ty);
419
420 /// A bookkeeping map to memorize an already made decision for a traversed
421 /// value.
422 SmallDenseMap<Value *, bool, 8> Visited;
423
424 /// A list of pending values to check in the end.
425 SmallVector<Value *, 8> Pending;
426};
427
428} // anonymous namespace
429
430/// Constants and extensions/truncates from the destination type are always
431/// free to be evaluated in that type. This is a helper for canEvaluate*.
432bool TypeEvaluationHelper::canAlwaysEvaluateInType(Value *V, Type *Ty) {
433 if (isa<Constant>(V))
434 return match(V, m_ImmConstant());
435
436 Value *X;
437 if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
438 X->getType() == Ty)
439 return true;
440
441 return false;
442}
443
444/// Filter out values that we can not evaluate in the destination type for free.
445/// This is a helper for canEvaluate*.
446bool TypeEvaluationHelper::canNotEvaluateInType(Value *V, Type *Ty) {
447 if (!isa<Instruction>(V))
448 return true;
449 // We don't extend or shrink something that has multiple uses -- doing so
450 // would require duplicating the instruction which isn't profitable.
451 if (!V->hasOneUse())
452 return true;
453
454 return false;
455}
456
457/// Return true if we can evaluate the specified expression tree as type Ty
458/// instead of its larger type, and arrive with the same value.
459/// This is used by code that tries to eliminate truncates.
460///
461/// Ty will always be a type smaller than V. We should return true if trunc(V)
462/// can be computed by computing V in the smaller type. If V is an instruction,
463/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
464/// makes sense if x and y can be efficiently truncated.
465///
466/// This function works on both vectors and scalars.
467///
468bool TypeEvaluationHelper::canEvaluateTruncated(Value *V, Type *Ty,
470 Instruction *CxtI) {
471 TypeEvaluationHelper TYH;
472 return TYH.canEvaluateTruncatedImpl(V, Ty, IC, CxtI) &&
473 // We need to check whether we visited all users of multi-user values,
474 // and we have to do it at the very end, outside of the recursion.
475 TYH.allPendingVisited();
476}
477
478bool TypeEvaluationHelper::canEvaluateTruncatedImpl(Value *V, Type *Ty,
480 Instruction *CxtI) {
481 return canEvaluate(V, Ty, [this, &IC, CxtI](Value *V, Type *Ty) {
482 return canEvaluateTruncatedPred(V, Ty, IC, CxtI);
483 });
484}
485
486bool TypeEvaluationHelper::canEvaluateTruncatedPred(Value *V, Type *Ty,
488 Instruction *CxtI) {
489 auto *I = cast<Instruction>(V);
490 Type *OrigTy = V->getType();
491 switch (I->getOpcode()) {
492 case Instruction::Add:
493 case Instruction::Sub:
494 case Instruction::Mul:
495 case Instruction::And:
496 case Instruction::Or:
497 case Instruction::Xor:
498 // These operators can all arbitrarily be extended or truncated.
499 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
500 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
501
502 case Instruction::UDiv:
503 case Instruction::URem: {
504 // UDiv and URem can be truncated if all the truncated bits are zero.
505 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
506 uint32_t BitWidth = Ty->getScalarSizeInBits();
507 assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
508 APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
509 // Do not preserve the original context instruction. Simplifying div/rem
510 // based on later context may introduce a trap.
511 if (IC.MaskedValueIsZero(I->getOperand(0), Mask, I) &&
512 IC.MaskedValueIsZero(I->getOperand(1), Mask, I)) {
513 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
514 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
515 }
516 break;
517 }
518 case Instruction::Shl: {
519 // If we are truncating the result of this SHL, and if it's a shift of an
520 // inrange amount, we can always perform a SHL in a smaller type.
521 uint32_t BitWidth = Ty->getScalarSizeInBits();
522 KnownBits AmtKnownBits =
523 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
524 if (AmtKnownBits.getMaxValue().ult(BitWidth))
525 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
526 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
527 break;
528 }
529 case Instruction::LShr: {
530 // If this is a truncate of a logical shr, we can truncate it to a smaller
531 // lshr iff we know that the bits we would otherwise be shifting in are
532 // already zeros.
533 // TODO: It is enough to check that the bits we would be shifting in are
534 // zero - use AmtKnownBits.getMaxValue().
535 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
536 uint32_t BitWidth = Ty->getScalarSizeInBits();
537 KnownBits AmtKnownBits = IC.computeKnownBits(I->getOperand(1), CxtI);
538 APInt MaxShiftAmt = AmtKnownBits.getMaxValue();
539 APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
540 if (MaxShiftAmt.ult(BitWidth)) {
541 // If the only user is a trunc then we can narrow the shift if any new
542 // MSBs are not going to be used.
543 if (auto *Trunc = dyn_cast<TruncInst>(V->user_back())) {
544 auto DemandedBits = Trunc->getType()->getScalarSizeInBits();
545 if ((MaxShiftAmt + DemandedBits).ule(BitWidth))
546 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
547 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
548 }
549 if (IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, CxtI))
550 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
551 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
552 }
553 break;
554 }
555 case Instruction::AShr: {
556 // If this is a truncate of an arithmetic shr, we can truncate it to a
557 // smaller ashr iff we know that all the bits from the sign bit of the
558 // original type and the sign bit of the truncate type are similar.
559 // TODO: It is enough to check that the bits we would be shifting in are
560 // similar to sign bit of the truncate type.
561 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
562 uint32_t BitWidth = Ty->getScalarSizeInBits();
563 KnownBits AmtKnownBits =
564 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
565 unsigned ShiftedBits = OrigBitWidth - BitWidth;
566 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
567 ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), CxtI))
568 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
569 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
570 break;
571 }
572 case Instruction::Trunc:
573 // trunc(trunc(x)) -> trunc(x)
574 return true;
575 case Instruction::ZExt:
576 case Instruction::SExt:
577 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
578 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
579 return true;
580 case Instruction::Select: {
582 return canEvaluateTruncatedImpl(SI->getTrueValue(), Ty, IC, CxtI) &&
583 canEvaluateTruncatedImpl(SI->getFalseValue(), Ty, IC, CxtI);
584 }
585 case Instruction::PHI: {
586 // We can change a phi if we can change all operands. Note that we never
587 // get into trouble with cyclic PHIs here because canEvaluate handles use
588 // chain loops.
589 PHINode *PN = cast<PHINode>(I);
590 return llvm::all_of(
591 PN->incoming_values(), [this, Ty, &IC, CxtI](Value *IncValue) {
592 return canEvaluateTruncatedImpl(IncValue, Ty, IC, CxtI);
593 });
594 }
595 case Instruction::FPToUI:
596 case Instruction::FPToSI: {
597 // If the integer type can hold the max FP value, it is safe to cast
598 // directly to that type. Otherwise, we may create poison via overflow
599 // that did not exist in the original code.
600 Type *InputTy = I->getOperand(0)->getType()->getScalarType();
601 const fltSemantics &Semantics = InputTy->getFltSemantics();
602 uint32_t MinBitWidth = APFloatBase::semanticsIntSizeInBits(
603 Semantics, I->getOpcode() == Instruction::FPToSI);
604 return Ty->getScalarSizeInBits() >= MinBitWidth;
605 }
606 case Instruction::ShuffleVector:
607 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
608 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
609
610 default:
611 // TODO: Can handle more cases here.
612 break;
613 }
614
615 return false;
616}
617
618/// Given a vector that is bitcast to an integer, optionally logically
619/// right-shifted, and truncated, convert it to an extractelement.
620/// Example (big endian):
621/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
622/// --->
623/// extractelement <4 x i32> %X, 1
625 InstCombinerImpl &IC) {
626 Value *TruncOp = Trunc.getOperand(0);
627 Type *DestType = Trunc.getType();
628 if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
629 return nullptr;
630
631 Value *VecInput = nullptr;
632 ConstantInt *ShiftVal = nullptr;
633 if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
634 m_LShr(m_BitCast(m_Value(VecInput)),
635 m_ConstantInt(ShiftVal)))) ||
636 !isa<VectorType>(VecInput->getType()))
637 return nullptr;
638
639 VectorType *VecType = cast<VectorType>(VecInput->getType());
640 unsigned VecWidth = VecType->getPrimitiveSizeInBits();
641 unsigned DestWidth = DestType->getPrimitiveSizeInBits();
642 unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
643
644 if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
645 return nullptr;
646
647 // If the element type of the vector doesn't match the result type,
648 // bitcast it to a vector type that we can extract from.
649 unsigned NumVecElts = VecWidth / DestWidth;
650 if (VecType->getElementType() != DestType) {
651 VecType = FixedVectorType::get(DestType, NumVecElts);
652 VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
653 }
654
655 unsigned Elt = ShiftAmount / DestWidth;
656 if (IC.getDataLayout().isBigEndian())
657 Elt = NumVecElts - 1 - Elt;
658
659 return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
660}
661
662/// Whenever an element is extracted from a vector, optionally shifted down, and
663/// then truncated, canonicalize by converting it to a bitcast followed by an
664/// extractelement.
665///
666/// Examples (little endian):
667/// trunc (extractelement <4 x i64> %X, 0) to i32
668/// --->
669/// extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
670///
671/// trunc (lshr (extractelement <4 x i32> %X, 0), 8) to i8
672/// --->
673/// extractelement <16 x i8> (bitcast <4 x i32> %X to <16 x i8>), i32 1
675 InstCombinerImpl &IC) {
676 Value *Src = Trunc.getOperand(0);
677 Type *SrcType = Src->getType();
678 Type *DstType = Trunc.getType();
679
680 // Only attempt this if we have simple aliasing of the vector elements.
681 // A badly fit destination size would result in an invalid cast.
682 unsigned SrcBits = SrcType->getScalarSizeInBits();
683 unsigned DstBits = DstType->getScalarSizeInBits();
684 unsigned TruncRatio = SrcBits / DstBits;
685 if ((SrcBits % DstBits) != 0)
686 return nullptr;
687
688 Value *VecOp;
689 ConstantInt *Cst;
690 const APInt *ShiftAmount = nullptr;
691 if (!match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst)))) &&
692 !match(Src,
694 m_APInt(ShiftAmount)))))
695 return nullptr;
696
697 auto *VecOpTy = cast<VectorType>(VecOp->getType());
698 auto VecElts = VecOpTy->getElementCount();
699
700 uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
701 uint64_t VecOpIdx = Cst->getZExtValue();
702 uint64_t NewIdx = IC.getDataLayout().isBigEndian()
703 ? (VecOpIdx + 1) * TruncRatio - 1
704 : VecOpIdx * TruncRatio;
705
706 // Adjust index by the whole number of truncated elements.
707 if (ShiftAmount) {
708 // Check shift amount is in range and shifts a whole number of truncated
709 // elements.
710 if (ShiftAmount->uge(SrcBits) || ShiftAmount->urem(DstBits) != 0)
711 return nullptr;
712
713 uint64_t IdxOfs = ShiftAmount->udiv(DstBits).getZExtValue();
714 NewIdx = IC.getDataLayout().isBigEndian() ? (NewIdx - IdxOfs)
715 : (NewIdx + IdxOfs);
716 }
717
718 assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
719 NewIdx <= std::numeric_limits<uint32_t>::max() && "overflow 32-bits");
720
721 auto *BitCastTo =
722 VectorType::get(DstType, BitCastNumElts, VecElts.isScalable());
723 Value *BitCast = IC.Builder.CreateBitCast(VecOp, BitCastTo);
724 return ExtractElementInst::Create(BitCast, IC.Builder.getInt32(NewIdx));
725}
726
727/// Funnel/Rotate left/right may occur in a wider type than necessary because of
728/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
729Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
730 assert((isa<VectorType>(Trunc.getSrcTy()) ||
731 shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
732 "Don't narrow to an illegal scalar type");
733
734 // Bail out on strange types. It is possible to handle some of these patterns
735 // even with non-power-of-2 sizes, but it is not a likely scenario.
736 Type *DestTy = Trunc.getType();
737 unsigned NarrowWidth = DestTy->getScalarSizeInBits();
738 unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
739 if (!isPowerOf2_32(NarrowWidth))
740 return nullptr;
741
742 // First, find an or'd pair of opposite shifts:
743 // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
744 BinaryOperator *Or0, *Or1;
745 if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
746 return nullptr;
747
748 Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
749 if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
750 !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
751 Or0->getOpcode() == Or1->getOpcode())
752 return nullptr;
753
754 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
755 if (Or0->getOpcode() == BinaryOperator::LShr) {
756 std::swap(Or0, Or1);
757 std::swap(ShVal0, ShVal1);
758 std::swap(ShAmt0, ShAmt1);
759 }
760 assert(Or0->getOpcode() == BinaryOperator::Shl &&
761 Or1->getOpcode() == BinaryOperator::LShr &&
762 "Illegal or(shift,shift) pair");
763
764 // Match the shift amount operands for a funnel/rotate pattern. This always
765 // matches a subtraction on the R operand.
766 auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
767 // The shift amounts may add up to the narrow bit width:
768 // (shl ShVal0, L) | (lshr ShVal1, Width - L)
769 // If this is a funnel shift (different operands are shifted), then the
770 // shift amount can not over-shift (create poison) in the narrow type.
771 unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
772 APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
773 if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
774 if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
775 return L;
776
777 // The following patterns currently only work for rotation patterns.
778 // TODO: Add more general funnel-shift compatible patterns.
779 if (ShVal0 != ShVal1)
780 return nullptr;
781
782 // The shift amount may be masked with negation:
783 // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
784 Value *X;
785 unsigned Mask = Width - 1;
786 if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
788 return X;
789
790 // Same as above, but the shift amount may be extended after masking:
791 if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
793 return X;
794
795 return nullptr;
796 };
797
798 Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
799 bool IsFshl = true; // Sub on LSHR.
800 if (!ShAmt) {
801 ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
802 IsFshl = false; // Sub on SHL.
803 }
804 if (!ShAmt)
805 return nullptr;
806
807 // The right-shifted value must have high zeros in the wide type (for example
808 // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
809 // truncated, so those do not matter.
810 APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
811 if (!MaskedValueIsZero(ShVal1, HiBitMask, &Trunc))
812 return nullptr;
813
814 // Adjust the width of ShAmt for narrowed funnel shift operation:
815 // - Zero-extend if ShAmt is narrower than the destination type.
816 // - Truncate if ShAmt is wider, discarding non-significant high-order bits.
817 // This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),
818 // zext/trunc(ShAmt)).
819 Value *NarrowShAmt = Builder.CreateZExtOrTrunc(ShAmt, DestTy);
820
821 Value *X, *Y;
822 X = Y = Builder.CreateTrunc(ShVal0, DestTy);
823 if (ShVal0 != ShVal1)
824 Y = Builder.CreateTrunc(ShVal1, DestTy);
825 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
826 Function *F =
827 Intrinsic::getOrInsertDeclaration(Trunc.getModule(), IID, DestTy);
828 return CallInst::Create(F, {X, Y, NarrowShAmt});
829}
830
831/// Try to narrow the width of math or bitwise logic instructions by pulling a
832/// truncate ahead of binary operators.
833Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
834 Type *SrcTy = Trunc.getSrcTy();
835 Type *DestTy = Trunc.getType();
836 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
837 unsigned DestWidth = DestTy->getScalarSizeInBits();
838
839 if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
840 return nullptr;
841
842 BinaryOperator *BinOp;
843 if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
844 return nullptr;
845
846 Value *BinOp0 = BinOp->getOperand(0);
847 Value *BinOp1 = BinOp->getOperand(1);
848 switch (BinOp->getOpcode()) {
849 case Instruction::And:
850 case Instruction::Or:
851 case Instruction::Xor:
852 case Instruction::Add:
853 case Instruction::Sub:
854 case Instruction::Mul: {
855 Constant *C;
856 if (match(BinOp0, m_Constant(C))) {
857 // trunc (binop C, X) --> binop (trunc C', X)
858 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
859 Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
860 return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
861 }
862 if (match(BinOp1, m_Constant(C))) {
863 // trunc (binop X, C) --> binop (trunc X, C')
864 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
865 Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
866 return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
867 }
868 Value *X;
869 if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
870 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
871 Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
872 return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
873 }
874 if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
875 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
876 Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
877 return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
878 }
879 break;
880 }
881 case Instruction::LShr:
882 case Instruction::AShr: {
883 // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
884 Value *A;
885 Constant *C;
886 if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
887 unsigned MaxShiftAmt = SrcWidth - DestWidth;
888 // If the shift is small enough, all zero/sign bits created by the shift
889 // are removed by the trunc.
891 APInt(SrcWidth, MaxShiftAmt)))) {
892 auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
893 bool IsExact = OldShift->isExact();
894 if (Constant *ShAmt = ConstantFoldIntegerCast(C, A->getType(),
895 /*IsSigned*/ true, DL)) {
896 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
897 Value *Shift =
898 OldShift->getOpcode() == Instruction::AShr
899 ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
900 : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
901 return CastInst::CreateTruncOrBitCast(Shift, DestTy);
902 }
903 }
904 }
905 break;
906 }
907 default: break;
908 }
909
910 if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
911 return NarrowOr;
912
913 return nullptr;
914}
915
916/// Try to narrow the width of a splat shuffle. This could be generalized to any
917/// shuffle with a constant operand, but we limit the transform to avoid
918/// creating a shuffle type that targets may not be able to lower effectively.
920 InstCombiner::BuilderTy &Builder) {
921 auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
922 if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
923 all_equal(Shuf->getShuffleMask()) &&
924 ElementCount::isKnownGE(Shuf->getType()->getElementCount(),
925 cast<VectorType>(Shuf->getOperand(0)->getType())
926 ->getElementCount())) {
927 // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
928 // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
929 Type *NewTruncTy = Shuf->getOperand(0)->getType()->getWithNewType(
930 Trunc.getType()->getScalarType());
931 Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), NewTruncTy);
932 return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
933 }
934
935 return nullptr;
936}
937
938/// Try to narrow the width of an insert element. This could be generalized for
939/// any vector constant, but we limit the transform to insertion into undef to
940/// avoid potential backend problems from unsupported insertion widths. This
941/// could also be extended to handle the case of inserting a scalar constant
942/// into a vector variable.
944 InstCombiner::BuilderTy &Builder) {
945 Instruction::CastOps Opcode = Trunc.getOpcode();
946 assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
947 "Unexpected instruction for shrinking");
948
949 auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
950 if (!InsElt || !InsElt->hasOneUse())
951 return nullptr;
952
953 Type *DestTy = Trunc.getType();
954 Type *DestScalarTy = DestTy->getScalarType();
955 Value *VecOp = InsElt->getOperand(0);
956 Value *ScalarOp = InsElt->getOperand(1);
957 Value *Index = InsElt->getOperand(2);
958
959 if (match(VecOp, m_Undef())) {
960 // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
961 // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
962 UndefValue *NarrowUndef = UndefValue::get(DestTy);
963 Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
964 return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
965 }
966
967 return nullptr;
968}
969
971 if (Instruction *Result = commonCastTransforms(Trunc))
972 return Result;
973
974 Value *Src = Trunc.getOperand(0);
975 Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
976 unsigned DestWidth = DestTy->getScalarSizeInBits();
977 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
978
979 // Attempt to truncate the entire input expression tree to the destination
980 // type. Only do this if the dest type is a simple type, don't convert the
981 // expression tree to something weird like i93 unless the source is also
982 // strange.
983 if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
984 TypeEvaluationHelper::canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
985
986 // If this cast is a truncate, evaluting in a different type always
987 // eliminates the cast, so it is always a win.
989 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
990 " to avoid cast: "
991 << Trunc << '\n');
992 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
993 assert(Res->getType() == DestTy);
994 return replaceInstUsesWith(Trunc, Res);
995 }
996
997 // For integer types, check if we can shorten the entire input expression to
998 // DestWidth * 2, which won't allow removing the truncate, but reducing the
999 // width may enable further optimizations, e.g. allowing for larger
1000 // vectorization factors.
1001 if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
1002 if (DestWidth * 2 < SrcWidth) {
1003 auto *NewDestTy = DestITy->getExtendedType();
1004 if (shouldChangeType(SrcTy, NewDestTy) &&
1005 TypeEvaluationHelper::canEvaluateTruncated(Src, NewDestTy, *this,
1006 &Trunc)) {
1007 LLVM_DEBUG(
1008 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1009 " to reduce the width of operand of"
1010 << Trunc << '\n');
1011 Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
1012 return new TruncInst(Res, DestTy);
1013 }
1014 }
1015 }
1016
1017 // See if we can simplify any instructions used by the input whose sole
1018 // purpose is to compute bits we don't care about.
1020 return &Trunc;
1021
1022 if (DestWidth == 1) {
1023 Value *Zero = Constant::getNullValue(SrcTy);
1024
1025 Value *X;
1026 const APInt *C1;
1027 Constant *C2;
1028 if (match(Src, m_OneUse(m_Shr(m_Shl(m_Power2(C1), m_Value(X)),
1029 m_ImmConstant(C2))))) {
1030 // trunc ((C1 << X) >> C2) to i1 --> X == (C2-cttz(C1)), where C1 is pow2
1031 Constant *Log2C1 = ConstantInt::get(SrcTy, C1->exactLogBase2());
1032 Constant *CmpC = ConstantExpr::getSub(C2, Log2C1);
1033 return new ICmpInst(ICmpInst::ICMP_EQ, X, CmpC);
1034 }
1035
1036 if (match(Src, m_Shr(m_Value(X), m_SpecificInt(SrcWidth - 1)))) {
1037 // trunc (ashr X, BW-1) to i1 --> icmp slt X, 0
1038 // trunc (lshr X, BW-1) to i1 --> icmp slt X, 0
1039 return new ICmpInst(ICmpInst::ICMP_SLT, X, Zero);
1040 }
1041
1042 Constant *C;
1043 if (match(Src, m_OneUse(m_LShr(m_Value(X), m_ImmConstant(C))))) {
1044 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
1045 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
1046 Value *MaskC = Builder.CreateShl(One, C);
1047 Value *And = Builder.CreateAnd(X, MaskC);
1048 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
1049 }
1051 m_Deferred(X))))) {
1052 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
1053 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
1054 Value *MaskC = Builder.CreateShl(One, C);
1055 Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One));
1056 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
1057 }
1058
1059 {
1060 const APInt *C;
1061 if (match(Src, m_Shl(m_APInt(C), m_Value(X))) && (*C)[0] == 1) {
1062 // trunc (C << X) to i1 --> X == 0, where C is odd
1063 return new ICmpInst(ICmpInst::Predicate::ICMP_EQ, X, Zero);
1064 }
1065 }
1066
1067 if (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) {
1068 Value *X, *Y;
1069 if (match(Src, m_Xor(m_Value(X), m_Value(Y))))
1070 return new ICmpInst(ICmpInst::ICMP_NE, X, Y);
1071 }
1072 }
1073
1074 Value *A, *B;
1075 Constant *C;
1076 if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
1077 unsigned AWidth = A->getType()->getScalarSizeInBits();
1078 unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
1079 auto *OldSh = cast<Instruction>(Src);
1080 bool IsExact = OldSh->isExact();
1081
1082 // If the shift is small enough, all zero bits created by the shift are
1083 // removed by the trunc.
1085 APInt(SrcWidth, MaxShiftAmt)))) {
1086 auto GetNewShAmt = [&](unsigned Width) {
1087 Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);
1088 Constant *Cmp =
1090 Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);
1091 return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),
1092 DL);
1093 };
1094
1095 // trunc (lshr (sext A), C) --> ashr A, C
1096 if (A->getType() == DestTy) {
1097 Constant *ShAmt = GetNewShAmt(DestWidth);
1098 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
1099 return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
1100 : BinaryOperator::CreateAShr(A, ShAmt);
1101 }
1102 // The types are mismatched, so create a cast after shifting:
1103 // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
1104 if (Src->hasOneUse()) {
1105 Constant *ShAmt = GetNewShAmt(AWidth);
1106 Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
1107 return CastInst::CreateIntegerCast(Shift, DestTy, true);
1108 }
1109 }
1110 // TODO: Mask high bits with 'and'.
1111 }
1112
1113 if (Instruction *I = narrowBinOp(Trunc))
1114 return I;
1115
1116 if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
1117 return I;
1118
1119 if (Instruction *I = shrinkInsertElt(Trunc, Builder))
1120 return I;
1121
1122 if (Src->hasOneUse() &&
1123 (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
1124 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
1125 // dest type is native and cst < dest size.
1126 if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
1127 !match(A, m_Shr(m_Value(), m_Constant()))) {
1128 // Skip shifts of shift by constants. It undoes a combine in
1129 // FoldShiftByConstant and is the extend in reg pattern.
1130 APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
1131 if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
1132 Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
1133 return BinaryOperator::Create(Instruction::Shl, NewTrunc,
1134 ConstantExpr::getTrunc(C, DestTy));
1135 }
1136 }
1137 }
1138
1139 if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
1140 return I;
1141
1142 if (Instruction *I = foldVecExtTruncToExtElt(Trunc, *this))
1143 return I;
1144
1145 // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
1147 m_Value(B))))) {
1148 unsigned AWidth = A->getType()->getScalarSizeInBits();
1149 if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
1150 Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
1151 Value *NarrowCtlz =
1152 Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
1153 return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
1154 }
1155 }
1156
1157 if (match(Src, m_VScale())) {
1158 if (Trunc.getFunction() &&
1159 Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1160 Attribute Attr =
1161 Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
1162 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
1163 if (Log2_32(*MaxVScale) < DestWidth)
1164 return replaceInstUsesWith(Trunc, Builder.CreateVScale(DestTy));
1165 }
1166 }
1167
1168 if (DestWidth == 1 &&
1169 (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) &&
1170 isKnownNonZero(Src, SQ.getWithInstruction(&Trunc)))
1171 return replaceInstUsesWith(Trunc, ConstantInt::getTrue(DestTy));
1172
1173 bool Changed = false;
1174 if (!Trunc.hasNoSignedWrap() &&
1175 ComputeMaxSignificantBits(Src, &Trunc) <= DestWidth) {
1176 Trunc.setHasNoSignedWrap(true);
1177 Changed = true;
1178 }
1179 if (!Trunc.hasNoUnsignedWrap() &&
1180 MaskedValueIsZero(Src, APInt::getBitsSetFrom(SrcWidth, DestWidth),
1181 &Trunc)) {
1182 Trunc.setHasNoUnsignedWrap(true);
1183 Changed = true;
1184 }
1185
1186 const APInt *C1;
1187 Value *V1;
1188 // OP = { lshr, ashr }
1189 // trunc ( OP i8 C1, V1) to i1 -> icmp eq V1, log_2(C1) iff C1 is power of 2
1190 if (DestWidth == 1 && match(Src, m_Shr(m_Power2(C1), m_Value(V1)))) {
1191 Value *Right = ConstantInt::get(V1->getType(), C1->countr_zero());
1192 return new ICmpInst(ICmpInst::ICMP_EQ, V1, Right);
1193 }
1194
1195 // OP = { lshr, ashr }
1196 // trunc ( OP i8 C1, V1) to i1 -> icmp ult V1, log_2(C1 + 1) iff (C1 + 1) is
1197 // power of 2
1198 if (DestWidth == 1 && match(Src, m_Shr(m_LowBitMask(C1), m_Value(V1)))) {
1199 Value *Right = ConstantInt::get(V1->getType(), C1->countr_one());
1200 return new ICmpInst(ICmpInst::ICMP_ULT, V1, Right);
1201 }
1202
1203 // OP = { lshr, ashr }
1204 // trunc ( OP i8 C1, V1) to i1 -> icmp ugt V1, cttz(C1) - 1 iff (C1) is
1205 // negative power of 2
1206 if (DestWidth == 1 && match(Src, m_Shr(m_NegatedPower2(C1), m_Value(V1)))) {
1207 Value *Right = ConstantInt::get(V1->getType(), C1->countr_zero());
1208 return new ICmpInst(ICmpInst::ICMP_UGE, V1, Right);
1209 }
1210
1211 return Changed ? &Trunc : nullptr;
1212}
1213
1214Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
1215 ZExtInst &Zext) {
1216 // If we are just checking for a icmp eq of a single bit and zext'ing it
1217 // to an integer, then shift the bit to the appropriate place and then
1218 // cast to integer to avoid the comparison.
1219
1220 // FIXME: This set of transforms does not check for extra uses and/or creates
1221 // an extra instruction (an optional final cast is not included
1222 // in the transform comments). We may also want to favor icmp over
1223 // shifts in cases of equal instructions because icmp has better
1224 // analysis in general (invert the transform).
1225
1226 const APInt *Op1CV;
1227 if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
1228
1229 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
1230 if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
1231 Value *In = Cmp->getOperand(0);
1232 Value *Sh = ConstantInt::get(In->getType(),
1233 In->getType()->getScalarSizeInBits() - 1);
1234 In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
1235 if (In->getType() != Zext.getType())
1236 In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
1237
1238 return replaceInstUsesWith(Zext, In);
1239 }
1240
1241 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
1242 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1243 // zext (X != 0) to i32 --> X iff X has only the low bit set.
1244 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
1245
1246 if (Op1CV->isZero() && Cmp->isEquality()) {
1247 // Exactly 1 possible 1? But not the high-bit because that is
1248 // canonicalized to this form.
1249 KnownBits Known = computeKnownBits(Cmp->getOperand(0), &Zext);
1250 APInt KnownZeroMask(~Known.Zero);
1251 uint32_t ShAmt = KnownZeroMask.logBase2();
1252 bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&
1253 (Zext.getType()->getScalarSizeInBits() != ShAmt + 1);
1254 if (IsExpectShAmt &&
1255 (Cmp->getOperand(0)->getType() == Zext.getType() ||
1256 Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) {
1257 Value *In = Cmp->getOperand(0);
1258 if (ShAmt) {
1259 // Perform a logical shr by shiftamt.
1260 // Insert the shift to put the result in the low bit.
1261 In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
1262 In->getName() + ".lobit");
1263 }
1264
1265 // Toggle the low bit for "X == 0".
1266 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1267 In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));
1268
1269 if (Zext.getType() == In->getType())
1270 return replaceInstUsesWith(Zext, In);
1271
1272 Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
1273 return replaceInstUsesWith(Zext, IntCast);
1274 }
1275 }
1276 }
1277
1278 if (Cmp->isEquality()) {
1279 // Test if a bit is clear/set using a shifted-one mask:
1280 // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
1281 // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
1282 Value *X, *ShAmt;
1283 if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
1284 match(Cmp->getOperand(0),
1285 m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
1286 auto *And = cast<BinaryOperator>(Cmp->getOperand(0));
1287 Value *Shift = And->getOperand(X == And->getOperand(0) ? 1 : 0);
1288 if (Zext.getType() == And->getType() ||
1289 Cmp->getPredicate() != ICmpInst::ICMP_EQ || Shift->hasOneUse()) {
1290 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1291 X = Builder.CreateNot(X);
1292 Value *Lshr = Builder.CreateLShr(X, ShAmt);
1293 Value *And1 =
1294 Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
1295 return replaceInstUsesWith(
1296 Zext, Builder.CreateZExtOrTrunc(And1, Zext.getType()));
1297 }
1298 }
1299 }
1300
1301 return nullptr;
1302}
1303
1304/// Determine if the specified value can be computed in the specified wider type
1305/// and produce the same low bits. If not, return false.
1306///
1307/// If this function returns true, it can also return a non-zero number of bits
1308/// (in BitsToClear) which indicates that the value it computes is correct for
1309/// the zero extend, but that the additional BitsToClear bits need to be zero'd
1310/// out. For example, to promote something like:
1311///
1312/// %B = trunc i64 %A to i32
1313/// %C = lshr i32 %B, 8
1314/// %E = zext i32 %C to i64
1315///
1316/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1317/// set to 8 to indicate that the promoted value needs to have bits 24-31
1318/// cleared in addition to bits 32-63. Since an 'and' will be generated to
1319/// clear the top bits anyway, doing this has no extra cost.
1320///
1321/// This function works on both vectors and scalars.
1322bool TypeEvaluationHelper::canEvaluateZExtd(Value *V, Type *Ty,
1323 unsigned &BitsToClear,
1324 InstCombinerImpl &IC,
1325 Instruction *CxtI) {
1326 TypeEvaluationHelper TYH;
1327 return TYH.canEvaluateZExtdImpl(V, Ty, BitsToClear, IC, CxtI);
1328}
1329bool TypeEvaluationHelper::canEvaluateZExtdImpl(Value *V, Type *Ty,
1330 unsigned &BitsToClear,
1331 InstCombinerImpl &IC,
1332 Instruction *CxtI) {
1333 BitsToClear = 0;
1334 if (canAlwaysEvaluateInType(V, Ty))
1335 return true;
1336 // We stick to the one-user limit for the ZExt transform due to the fact
1337 // that this predicate returns two values: predicate result and BitsToClear.
1338 if (canNotEvaluateInType(V, Ty))
1339 return false;
1340
1341 auto *I = cast<Instruction>(V);
1342 unsigned Tmp;
1343 switch (I->getOpcode()) {
1344 case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1345 case Instruction::SExt: // zext(sext(x)) -> sext(x).
1346 case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1347 return true;
1348 case Instruction::And:
1349 case Instruction::Or:
1350 case Instruction::Xor:
1351 case Instruction::Add:
1352 case Instruction::Sub:
1353 case Instruction::Mul:
1354 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1355 !canEvaluateZExtdImpl(I->getOperand(1), Ty, Tmp, IC, CxtI))
1356 return false;
1357 // These can all be promoted if neither operand has 'bits to clear'.
1358 if (BitsToClear == 0 && Tmp == 0)
1359 return true;
1360
1361 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1362 // other side, BitsToClear is ok.
1363 if (Tmp == 0 && I->isBitwiseLogicOp()) {
1364 // We use MaskedValueIsZero here for generality, but the case we care
1365 // about the most is constant RHS.
1366 unsigned VSize = V->getType()->getScalarSizeInBits();
1367 if (IC.MaskedValueIsZero(I->getOperand(1),
1368 APInt::getHighBitsSet(VSize, BitsToClear),
1369 CxtI)) {
1370 // If this is an And instruction and all of the BitsToClear are
1371 // known to be zero we can reset BitsToClear.
1372 if (I->getOpcode() == Instruction::And)
1373 BitsToClear = 0;
1374 return true;
1375 }
1376 }
1377
1378 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1379 return false;
1380
1381 case Instruction::Shl: {
1382 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1383 // upper bits we can reduce BitsToClear by the shift amount.
1384 uint64_t ShiftAmt;
1385 if (match(I->getOperand(1), m_ConstantInt(ShiftAmt))) {
1386 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1387 return false;
1388 BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1389 return true;
1390 }
1391 return false;
1392 }
1393 case Instruction::LShr: {
1394 // We can promote lshr(x, cst) if we can promote x. This requires the
1395 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1396 uint64_t ShiftAmt;
1397 if (match(I->getOperand(1), m_ConstantInt(ShiftAmt))) {
1398 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1399 return false;
1400 BitsToClear += ShiftAmt;
1401 if (BitsToClear > V->getType()->getScalarSizeInBits())
1402 BitsToClear = V->getType()->getScalarSizeInBits();
1403 return true;
1404 }
1405 // Cannot promote variable LSHR.
1406 return false;
1407 }
1408 case Instruction::Select:
1409 if (!canEvaluateZExtdImpl(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1410 !canEvaluateZExtdImpl(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1411 // TODO: If important, we could handle the case when the BitsToClear are
1412 // known zero in the disagreeing side.
1413 Tmp != BitsToClear)
1414 return false;
1415 return true;
1416
1417 case Instruction::PHI: {
1418 // We can change a phi if we can change all operands. Note that we never
1419 // get into trouble with cyclic PHIs here because we only consider
1420 // instructions with a single use.
1421 PHINode *PN = cast<PHINode>(I);
1422 if (!canEvaluateZExtdImpl(PN->getIncomingValue(0), Ty, BitsToClear, IC,
1423 CxtI))
1424 return false;
1425 for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1426 if (!canEvaluateZExtdImpl(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1427 // TODO: If important, we could handle the case when the BitsToClear
1428 // are known zero in the disagreeing input.
1429 Tmp != BitsToClear)
1430 return false;
1431 return true;
1432 }
1433 case Instruction::Call:
1434 // llvm.vscale() can always be executed in larger type, because the
1435 // value is automatically zero-extended.
1437 if (II->getIntrinsicID() == Intrinsic::vscale)
1438 return true;
1439 return false;
1440 default:
1441 // TODO: Can handle more cases here.
1442 return false;
1443 }
1444}
1445
1447 // If this zero extend is only used by a truncate, let the truncate be
1448 // eliminated before we try to optimize this zext.
1449 if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&
1450 !isa<Constant>(Zext.getOperand(0)))
1451 return nullptr;
1452
1453 // If one of the common conversion will work, do it.
1454 if (Instruction *Result = commonCastTransforms(Zext))
1455 return Result;
1456
1457 Value *Src = Zext.getOperand(0);
1458 Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
1459
1460 // zext nneg bool x -> 0
1461 if (SrcTy->isIntOrIntVectorTy(1) && Zext.hasNonNeg())
1463
1464 // Try to extend the entire expression tree to the wide destination type.
1465 unsigned BitsToClear;
1466 if (shouldChangeType(SrcTy, DestTy) &&
1467 TypeEvaluationHelper::canEvaluateZExtd(Src, DestTy, BitsToClear, *this,
1468 &Zext)) {
1469 assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1470 "Can't clear more bits than in SrcTy");
1471
1472 // Okay, we can transform this! Insert the new expression now.
1473 LLVM_DEBUG(
1474 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1475 " to avoid zero extend: "
1476 << Zext << '\n');
1477 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1478 assert(Res->getType() == DestTy);
1479
1480 // Preserve debug values referring to Src if the zext is its last use.
1481 if (auto *SrcOp = dyn_cast<Instruction>(Src))
1482 if (SrcOp->hasOneUse())
1483 replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);
1484
1485 uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1486 uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1487
1488 // If the high bits are already filled with zeros, just replace this
1489 // cast with the result.
1491 Res, APInt::getHighBitsSet(DestBitSize, DestBitSize - SrcBitsKept),
1492 &Zext))
1493 return replaceInstUsesWith(Zext, Res);
1494
1495 // We need to emit an AND to clear the high bits.
1496 Constant *C = ConstantInt::get(Res->getType(),
1497 APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1498 return BinaryOperator::CreateAnd(Res, C);
1499 }
1500
1501 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1502 // types and if the sizes are just right we can convert this into a logical
1503 // 'and' which will be much cheaper than the pair of casts.
1504 if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1505 // TODO: Subsume this into EvaluateInDifferentType.
1506
1507 // Get the sizes of the types involved. We know that the intermediate type
1508 // will be smaller than A or C, but don't know the relation between A and C.
1509 Value *A = CSrc->getOperand(0);
1510 unsigned SrcSize = A->getType()->getScalarSizeInBits();
1511 unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1512 unsigned DstSize = DestTy->getScalarSizeInBits();
1513 // If we're actually extending zero bits, then if
1514 // SrcSize < DstSize: zext(a & mask)
1515 // SrcSize == DstSize: a & mask
1516 // SrcSize > DstSize: trunc(a) & mask
1517 if (SrcSize < DstSize) {
1518 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1519 Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1520 Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1521 return new ZExtInst(And, DestTy);
1522 }
1523
1524 if (SrcSize == DstSize) {
1525 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1526 return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1527 AndValue));
1528 }
1529 if (SrcSize > DstSize) {
1530 Value *Trunc = Builder.CreateTrunc(A, DestTy);
1531 APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1532 return BinaryOperator::CreateAnd(Trunc,
1533 ConstantInt::get(Trunc->getType(),
1534 AndValue));
1535 }
1536 }
1537
1538 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1539 return transformZExtICmp(Cmp, Zext);
1540
1541 // zext(trunc(X) & C) -> (X & zext(C)).
1542 Constant *C;
1543 Value *X;
1544 if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1545 X->getType() == DestTy)
1546 return BinaryOperator::CreateAnd(X, Builder.CreateZExt(C, DestTy));
1547
1548 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1549 Value *And;
1550 if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1552 X->getType() == DestTy) {
1553 Value *ZC = Builder.CreateZExt(C, DestTy);
1554 return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1555 }
1556
1557 // If we are truncating, masking, and then zexting back to the original type,
1558 // that's just a mask. This is not handled by canEvaluateZextd if the
1559 // intermediate values have extra uses. This could be generalized further for
1560 // a non-constant mask operand.
1561 // zext (and (trunc X), C) --> and X, (zext C)
1562 if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&
1563 X->getType() == DestTy) {
1564 Value *ZextC = Builder.CreateZExt(C, DestTy);
1565 return BinaryOperator::CreateAnd(X, ZextC);
1566 }
1567
1568 if (match(Src, m_VScale())) {
1569 if (Zext.getFunction() &&
1570 Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1571 Attribute Attr =
1572 Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1573 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1574 unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1575 if (Log2_32(*MaxVScale) < TypeWidth)
1576 return replaceInstUsesWith(Zext, Builder.CreateVScale(DestTy));
1577 }
1578 }
1579 }
1580
1581 if (!Zext.hasNonNeg()) {
1582 // If this zero extend is only used by a shift, add nneg flag.
1583 if (Zext.hasOneUse() &&
1584 SrcTy->getScalarSizeInBits() >
1585 Log2_64_Ceil(DestTy->getScalarSizeInBits()) &&
1586 match(Zext.user_back(), m_Shift(m_Value(), m_Specific(&Zext)))) {
1587 Zext.setNonNeg();
1588 return &Zext;
1589 }
1590
1591 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Zext))) {
1592 Zext.setNonNeg();
1593 return &Zext;
1594 }
1595 }
1596
1597 return nullptr;
1598}
1599
1600/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1601Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
1602 SExtInst &Sext) {
1603 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1604 ICmpInst::Predicate Pred = Cmp->getPredicate();
1605
1606 // Don't bother if Op1 isn't of vector or integer type.
1607 if (!Op1->getType()->isIntOrIntVectorTy())
1608 return nullptr;
1609
1610 if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {
1611 // sext (x <s 0) --> ashr x, 31 (all ones if negative)
1612 Value *Sh = ConstantInt::get(Op0->getType(),
1613 Op0->getType()->getScalarSizeInBits() - 1);
1614 Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1615 if (In->getType() != Sext.getType())
1616 In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);
1617
1618 return replaceInstUsesWith(Sext, In);
1619 }
1620
1621 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1622 // If we know that only one bit of the LHS of the icmp can be set and we
1623 // have an equality comparison with zero or a power of 2, we can transform
1624 // the icmp and sext into bitwise/integer operations.
1625 if (Cmp->hasOneUse() &&
1626 Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1627 KnownBits Known = computeKnownBits(Op0, &Sext);
1628
1629 APInt KnownZeroMask(~Known.Zero);
1630 if (KnownZeroMask.isPowerOf2()) {
1631 Value *In = Cmp->getOperand(0);
1632
1633 // If the icmp tests for a known zero bit we can constant fold it.
1634 if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1635 Value *V = Pred == ICmpInst::ICMP_NE ?
1637 ConstantInt::getNullValue(Sext.getType());
1638 return replaceInstUsesWith(Sext, V);
1639 }
1640
1641 if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1642 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1643 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1644 unsigned ShiftAmt = KnownZeroMask.countr_zero();
1645 // Perform a right shift to place the desired bit in the LSB.
1646 if (ShiftAmt)
1647 In = Builder.CreateLShr(In,
1648 ConstantInt::get(In->getType(), ShiftAmt));
1649
1650 // At this point "In" is either 1 or 0. Subtract 1 to turn
1651 // {1, 0} -> {0, -1}.
1652 In = Builder.CreateAdd(In,
1653 ConstantInt::getAllOnesValue(In->getType()),
1654 "sext");
1655 } else {
1656 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1657 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1658 unsigned ShiftAmt = KnownZeroMask.countl_zero();
1659 // Perform a left shift to place the desired bit in the MSB.
1660 if (ShiftAmt)
1661 In = Builder.CreateShl(In,
1662 ConstantInt::get(In->getType(), ShiftAmt));
1663
1664 // Distribute the bit over the whole bit width.
1665 In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1666 KnownZeroMask.getBitWidth() - 1), "sext");
1667 }
1668
1669 if (Sext.getType() == In->getType())
1670 return replaceInstUsesWith(Sext, In);
1671 return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);
1672 }
1673 }
1674 }
1675
1676 return nullptr;
1677}
1678
1679/// Return true if we can take the specified value and return it as type Ty
1680/// without inserting any new casts and without changing the value of the common
1681/// low bits. This is used by code that tries to promote integer operations to
1682/// a wider types will allow us to eliminate the extension.
1683///
1684/// This function works on both vectors and scalars.
1685///
1686bool TypeEvaluationHelper::canEvaluateSExtd(Value *V, Type *Ty) {
1687 TypeEvaluationHelper TYH;
1688 return TYH.canEvaluateSExtdImpl(V, Ty) && TYH.allPendingVisited();
1689}
1690
1691bool TypeEvaluationHelper::canEvaluateSExtdImpl(Value *V, Type *Ty) {
1692 return canEvaluate(V, Ty, [this](Value *V, Type *Ty) {
1693 return canEvaluateSExtdPred(V, Ty);
1694 });
1695}
1696
1697bool TypeEvaluationHelper::canEvaluateSExtdPred(Value *V, Type *Ty) {
1698 assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1699 "Can't sign extend type to a smaller type");
1700
1701 auto *I = cast<Instruction>(V);
1702 switch (I->getOpcode()) {
1703 case Instruction::SExt: // sext(sext(x)) -> sext(x)
1704 case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1705 case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1706 return true;
1707 case Instruction::And:
1708 case Instruction::Or:
1709 case Instruction::Xor:
1710 case Instruction::Add:
1711 case Instruction::Sub:
1712 case Instruction::Mul:
1713 // These operators can all arbitrarily be extended if their inputs can.
1714 return canEvaluateSExtdImpl(I->getOperand(0), Ty) &&
1715 canEvaluateSExtdImpl(I->getOperand(1), Ty);
1716
1717 // case Instruction::Shl: TODO
1718 // case Instruction::LShr: TODO
1719
1720 case Instruction::Select:
1721 return canEvaluateSExtdImpl(I->getOperand(1), Ty) &&
1722 canEvaluateSExtdImpl(I->getOperand(2), Ty);
1723
1724 case Instruction::PHI: {
1725 // We can change a phi if we can change all operands. Note that we never
1726 // get into trouble with cyclic PHIs here because canEvaluate handles use
1727 // chain loops.
1728 PHINode *PN = cast<PHINode>(I);
1729 for (Value *IncValue : PN->incoming_values())
1730 if (!canEvaluateSExtdImpl(IncValue, Ty))
1731 return false;
1732 return true;
1733 }
1734 default:
1735 // TODO: Can handle more cases here.
1736 break;
1737 }
1738
1739 return false;
1740}
1741
1743 // If this sign extend is only used by a truncate, let the truncate be
1744 // eliminated before we try to optimize this sext.
1745 if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))
1746 return nullptr;
1747
1748 if (Instruction *I = commonCastTransforms(Sext))
1749 return I;
1750
1751 Value *Src = Sext.getOperand(0);
1752 Type *SrcTy = Src->getType(), *DestTy = Sext.getType();
1753 unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1754 unsigned DestBitSize = DestTy->getScalarSizeInBits();
1755
1756 // If the value being extended is zero or positive, use a zext instead.
1757 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Sext))) {
1758 auto CI = CastInst::Create(Instruction::ZExt, Src, DestTy);
1759 CI->setNonNeg(true);
1760 return CI;
1761 }
1762
1763 // Try to extend the entire expression tree to the wide destination type.
1764 bool ShouldExtendExpression = true;
1765 Value *TruncSrc = nullptr;
1766 // It is not desirable to extend expression in the trunc + sext pattern when
1767 // destination type is narrower than original (pre-trunc) type.
1768 if (match(Src, m_Trunc(m_Value(TruncSrc))))
1769 if (TruncSrc->getType()->getScalarSizeInBits() > DestBitSize)
1770 ShouldExtendExpression = false;
1771 if (ShouldExtendExpression && shouldChangeType(SrcTy, DestTy) &&
1772 TypeEvaluationHelper::canEvaluateSExtd(Src, DestTy)) {
1773 // Okay, we can transform this! Insert the new expression now.
1774 LLVM_DEBUG(
1775 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1776 " to avoid sign extend: "
1777 << Sext << '\n');
1778 Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1779 assert(Res->getType() == DestTy);
1780
1781 // If the high bits are already filled with sign bit, just replace this
1782 // cast with the result.
1783 if (ComputeNumSignBits(Res, &Sext) > DestBitSize - SrcBitSize)
1784 return replaceInstUsesWith(Sext, Res);
1785
1786 // We need to emit a shl + ashr to do the sign extend.
1787 Value *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1788 return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1789 ShAmt);
1790 }
1791
1792 Value *X = TruncSrc;
1793 if (X) {
1794 // If the input has more sign bits than bits truncated, then convert
1795 // directly to final type.
1796 unsigned XBitSize = X->getType()->getScalarSizeInBits();
1797 bool HasNSW = cast<TruncInst>(Src)->hasNoSignedWrap();
1798 if (HasNSW || (ComputeNumSignBits(X, &Sext) > XBitSize - SrcBitSize)) {
1799 auto *Res = CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1800 if (auto *ResTrunc = dyn_cast<TruncInst>(Res); ResTrunc && HasNSW)
1801 ResTrunc->setHasNoSignedWrap(true);
1802 return Res;
1803 }
1804
1805 // If input is a trunc from the destination type, then convert into shifts.
1806 if (Src->hasOneUse() && X->getType() == DestTy) {
1807 // sext (trunc X) --> ashr (shl X, C), C
1808 Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1809 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1810 }
1811
1812 // If we are replacing shifted-in high zero bits with sign bits, convert
1813 // the logic shift to arithmetic shift and eliminate the cast to
1814 // intermediate type:
1815 // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1816 Value *Y;
1817 if (Src->hasOneUse() &&
1819 m_SpecificIntAllowPoison(XBitSize - SrcBitSize)))) {
1820 Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1821 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1822 }
1823 }
1824
1825 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1826 return transformSExtICmp(Cmp, Sext);
1827
1828 // If the input is a shl/ashr pair of a same constant, then this is a sign
1829 // extension from a smaller value. If we could trust arbitrary bitwidth
1830 // integers, we could turn this into a truncate to the smaller bit and then
1831 // use a sext for the whole extension. Since we don't, look deeper and check
1832 // for a truncate. If the source and dest are the same type, eliminate the
1833 // trunc and extend and just do shifts. For example, turn:
1834 // %a = trunc i32 %i to i8
1835 // %b = shl i8 %a, C
1836 // %c = ashr i8 %b, C
1837 // %d = sext i8 %c to i32
1838 // into:
1839 // %a = shl i32 %i, 32-(8-C)
1840 // %d = ashr i32 %a, 32-(8-C)
1841 Value *A = nullptr;
1842 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1843 Constant *BA = nullptr, *CA = nullptr;
1844 if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1845 m_ImmConstant(CA))) &&
1846 BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1847 Constant *WideCurrShAmt =
1848 ConstantFoldCastOperand(Instruction::SExt, CA, DestTy, DL);
1849 assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");
1850 Constant *NumLowbitsLeft = ConstantExpr::getSub(
1851 ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1852 Constant *NewShAmt = ConstantExpr::getSub(
1853 ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1854 NumLowbitsLeft);
1855 NewShAmt =
1857 A = Builder.CreateShl(A, NewShAmt, Sext.getName());
1858 return BinaryOperator::CreateAShr(A, NewShAmt);
1859 }
1860
1861 // Splatting a bit of constant-index across a value:
1862 // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1863 // If the dest type is different, use a cast (adjust use check).
1864 if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
1865 m_SpecificInt(SrcBitSize - 1))))) {
1866 Type *XTy = X->getType();
1867 unsigned XBitSize = XTy->getScalarSizeInBits();
1868 Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
1869 Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
1870 if (XTy == DestTy)
1871 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
1872 AshrAmtC);
1873 if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
1874 Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
1875 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1876 }
1877 }
1878
1879 if (match(Src, m_VScale())) {
1880 if (Sext.getFunction() &&
1881 Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1882 Attribute Attr =
1883 Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1884 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
1885 if (Log2_32(*MaxVScale) < (SrcBitSize - 1))
1886 return replaceInstUsesWith(Sext, Builder.CreateVScale(DestTy));
1887 }
1888 }
1889
1890 return nullptr;
1891}
1892
1893/// Return a Constant* for the specified floating-point constant if it fits
1894/// in the specified FP type without changing its value.
1895static bool fitsInFPType(APFloat F, const fltSemantics &Sem) {
1896 bool losesInfo;
1897 (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
1898 return !losesInfo;
1899}
1900
1902 bool PreferBFloat) {
1903 // See if the value can be truncated to bfloat and then reextended.
1904 if (PreferBFloat && fitsInFPType(F, APFloat::BFloat()))
1905 return Type::getBFloatTy(Ctx);
1906 // See if the value can be truncated to half and then reextended.
1907 if (!PreferBFloat && fitsInFPType(F, APFloat::IEEEhalf()))
1908 return Type::getHalfTy(Ctx);
1909 // See if the value can be truncated to float and then reextended.
1911 return Type::getFloatTy(Ctx);
1912 if (&F.getSemantics() == &APFloat::IEEEdouble())
1913 return nullptr; // Won't shrink.
1914 // See if the value can be truncated to double and then reextended.
1916 return Type::getDoubleTy(Ctx);
1917 // Don't try to shrink to various long double types.
1918 return nullptr;
1919}
1920
1921static Type *shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat) {
1922 Type *Ty = CFP->getType();
1923 if (Ty->getScalarType()->isPPC_FP128Ty())
1924 return nullptr; // No constant folding of this.
1925
1926 Type *ShrinkTy =
1927 shrinkFPConstant(CFP->getContext(), CFP->getValueAPF(), PreferBFloat);
1928 if (ShrinkTy)
1929 if (auto *VecTy = dyn_cast<VectorType>(Ty))
1930 ShrinkTy = VectorType::get(ShrinkTy, VecTy);
1931
1932 return ShrinkTy;
1933}
1934
1935// Determine if this is a vector of ConstantFPs and if so, return the minimal
1936// type we can safely truncate all elements to.
1937static Type *shrinkFPConstantVector(Value *V, bool PreferBFloat) {
1938 auto *CV = dyn_cast<Constant>(V);
1939 auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
1940 if (!CV || !CVVTy)
1941 return nullptr;
1942
1943 Type *MinType = nullptr;
1944
1945 unsigned NumElts = CVVTy->getNumElements();
1946
1947 // For fixed-width vectors we find the minimal type by looking
1948 // through the constant values of the vector.
1949 for (unsigned i = 0; i != NumElts; ++i) {
1950 if (isa<UndefValue>(CV->getAggregateElement(i)))
1951 continue;
1952
1953 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
1954 if (!CFP)
1955 return nullptr;
1956
1957 Type *T = shrinkFPConstant(CFP, PreferBFloat);
1958 if (!T)
1959 return nullptr;
1960
1961 // If we haven't found a type yet or this type has a larger mantissa than
1962 // our previous type, this is our new minimal type.
1963 if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
1964 MinType = T;
1965 }
1966
1967 // Make a vector type from the minimal type.
1968 return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;
1969}
1970
1971/// Find the minimum FP type we can safely truncate to.
1972static Type *getMinimumFPType(Value *V, bool PreferBFloat) {
1973 if (auto *FPExt = dyn_cast<FPExtInst>(V))
1974 return FPExt->getOperand(0)->getType();
1975
1976 // If this value is a constant, return the constant in the smallest FP type
1977 // that can accurately represent it. This allows us to turn
1978 // (float)((double)X+2.0) into x+2.0f.
1979 if (auto *CFP = dyn_cast<ConstantFP>(V))
1980 if (Type *T = shrinkFPConstant(CFP, PreferBFloat))
1981 return T;
1982
1983 // Try to shrink scalable and fixed splat vectors.
1984 if (auto *FPC = dyn_cast<Constant>(V))
1985 if (auto *VTy = dyn_cast<VectorType>(V->getType()))
1986 if (auto *Splat = dyn_cast_or_null<ConstantFP>(FPC->getSplatValue()))
1987 if (Type *T = shrinkFPConstant(Splat, PreferBFloat))
1988 return VectorType::get(T, VTy);
1989
1990 // Try to shrink a vector of FP constants. This returns nullptr on scalable
1991 // vectors
1992 if (Type *T = shrinkFPConstantVector(V, PreferBFloat))
1993 return T;
1994
1995 return V->getType();
1996}
1997
1998/// Return true if the cast from integer to FP can be proven to be exact for all
1999/// possible inputs (the conversion does not lose any precision).
2001 CastInst::CastOps Opcode = I.getOpcode();
2002 assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
2003 "Unexpected cast");
2004 Value *Src = I.getOperand(0);
2005 Type *SrcTy = Src->getType();
2006 Type *FPTy = I.getType();
2007 bool IsSigned = Opcode == Instruction::SIToFP;
2008 int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
2009
2010 // Easy case - if the source integer type has less bits than the FP mantissa,
2011 // then the cast must be exact.
2012 int DestNumSigBits = FPTy->getFPMantissaWidth();
2013 if (SrcSize <= DestNumSigBits)
2014 return true;
2015
2016 // Cast from FP to integer and back to FP is independent of the intermediate
2017 // integer width because of poison on overflow.
2018 Value *F;
2019 if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
2020 // If this is uitofp (fptosi F), the source needs an extra bit to avoid
2021 // potential rounding of negative FP input values.
2022 int SrcNumSigBits = F->getType()->getFPMantissaWidth();
2023 if (!IsSigned && match(Src, m_FPToSI(m_Value())))
2024 SrcNumSigBits++;
2025
2026 // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
2027 // significant bits than the destination (and make sure neither type is
2028 // weird -- ppc_fp128).
2029 if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
2030 SrcNumSigBits <= DestNumSigBits)
2031 return true;
2032 }
2033
2034 // TODO:
2035 // Try harder to find if the source integer type has less significant bits.
2036 // For example, compute number of sign bits.
2037 KnownBits SrcKnown = IC.computeKnownBits(Src, &I);
2038 int SigBits = (int)SrcTy->getScalarSizeInBits() -
2039 SrcKnown.countMinLeadingZeros() -
2040 SrcKnown.countMinTrailingZeros();
2041 if (SigBits <= DestNumSigBits)
2042 return true;
2043
2044 return false;
2045}
2046
2049 return I;
2050
2051 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
2052 // simplify this expression to avoid one or more of the trunc/extend
2053 // operations if we can do so without changing the numerical results.
2054 //
2055 // The exact manner in which the widths of the operands interact to limit
2056 // what we can and cannot do safely varies from operation to operation, and
2057 // is explained below in the various case statements.
2058 Type *Ty = FPT.getType();
2059 auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
2060 if (BO && BO->hasOneUse()) {
2061 bool PreferBFloat = Ty->getScalarType()->isBFloatTy();
2062 Type *LHSMinType = getMinimumFPType(BO->getOperand(0), PreferBFloat);
2063 Type *RHSMinType = getMinimumFPType(BO->getOperand(1), PreferBFloat);
2064 unsigned OpWidth = BO->getType()->getFPMantissaWidth();
2065 unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
2066 unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
2067 unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
2068 unsigned DstWidth = Ty->getFPMantissaWidth();
2069 switch (BO->getOpcode()) {
2070 default: break;
2071 case Instruction::FAdd:
2072 case Instruction::FSub:
2073 // For addition and subtraction, the infinitely precise result can
2074 // essentially be arbitrarily wide; proving that double rounding
2075 // will not occur because the result of OpI is exact (as we will for
2076 // FMul, for example) is hopeless. However, we *can* nonetheless
2077 // frequently know that double rounding cannot occur (or that it is
2078 // innocuous) by taking advantage of the specific structure of
2079 // infinitely-precise results that admit double rounding.
2080 //
2081 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
2082 // to represent both sources, we can guarantee that the double
2083 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
2084 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
2085 // for proof of this fact).
2086 //
2087 // Note: Figueroa does not consider the case where DstFormat !=
2088 // SrcFormat. It's possible (likely even!) that this analysis
2089 // could be tightened for those cases, but they are rare (the main
2090 // case of interest here is (float)((double)float + float)).
2091 if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
2092 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2093 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2094 Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
2095 RI->copyFastMathFlags(BO);
2096 return RI;
2097 }
2098 break;
2099 case Instruction::FMul:
2100 // For multiplication, the infinitely precise result has at most
2101 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
2102 // that such a value can be exactly represented, then no double
2103 // rounding can possibly occur; we can safely perform the operation
2104 // in the destination format if it can represent both sources.
2105 if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
2106 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2107 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2108 return BinaryOperator::CreateFMulFMF(LHS, RHS, BO);
2109 }
2110 break;
2111 case Instruction::FDiv:
2112 // For division, we use again use the bound from Figueroa's
2113 // dissertation. I am entirely certain that this bound can be
2114 // tightened in the unbalanced operand case by an analysis based on
2115 // the diophantine rational approximation bound, but the well-known
2116 // condition used here is a good conservative first pass.
2117 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
2118 if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
2119 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2120 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2121 return BinaryOperator::CreateFDivFMF(LHS, RHS, BO);
2122 }
2123 break;
2124 case Instruction::FRem: {
2125 // Remainder is straightforward. Remainder is always exact, so the
2126 // type of OpI doesn't enter into things at all. We simply evaluate
2127 // in whichever source type is larger, then convert to the
2128 // destination type.
2129 if (SrcWidth == OpWidth)
2130 break;
2131 Value *LHS, *RHS;
2132 if (LHSWidth == SrcWidth) {
2133 LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
2134 RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
2135 } else {
2136 LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
2137 RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
2138 }
2139
2140 Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
2141 return CastInst::CreateFPCast(ExactResult, Ty);
2142 }
2143 }
2144 }
2145
2146 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
2147 Value *X;
2149 if (Op && Op->hasOneUse()) {
2150 FastMathFlags FMF = FPT.getFastMathFlags();
2151 if (auto *FPMO = dyn_cast<FPMathOperator>(Op))
2152 FMF &= FPMO->getFastMathFlags();
2153
2154 if (match(Op, m_FNeg(m_Value(X)))) {
2155 Value *InnerTrunc = Builder.CreateFPTruncFMF(X, Ty, FMF);
2156 Value *Neg = Builder.CreateFNegFMF(InnerTrunc, FMF);
2157 return replaceInstUsesWith(FPT, Neg);
2158 }
2159
2160 // If we are truncating a select that has an extended operand, we can
2161 // narrow the other operand and do the select as a narrow op.
2162 Value *Cond, *X, *Y;
2164 X->getType() == Ty) {
2165 // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
2166 Value *NarrowY = Builder.CreateFPTruncFMF(Y, Ty, FMF);
2167 Value *Sel =
2168 Builder.CreateSelectFMF(Cond, X, NarrowY, FMF, "narrow.sel", Op);
2169 return replaceInstUsesWith(FPT, Sel);
2170 }
2172 X->getType() == Ty) {
2173 // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
2174 Value *NarrowY = Builder.CreateFPTruncFMF(Y, Ty, FMF);
2175 Value *Sel =
2176 Builder.CreateSelectFMF(Cond, NarrowY, X, FMF, "narrow.sel", Op);
2177 return replaceInstUsesWith(FPT, Sel);
2178 }
2179 }
2180
2181 if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
2182 switch (II->getIntrinsicID()) {
2183 default: break;
2184 case Intrinsic::ceil:
2185 case Intrinsic::fabs:
2186 case Intrinsic::floor:
2187 case Intrinsic::nearbyint:
2188 case Intrinsic::rint:
2189 case Intrinsic::round:
2190 case Intrinsic::roundeven:
2191 case Intrinsic::trunc: {
2192 Value *Src = II->getArgOperand(0);
2193 if (!Src->hasOneUse())
2194 break;
2195
2196 // Except for fabs, this transformation requires the input of the unary FP
2197 // operation to be itself an fpext from the type to which we're
2198 // truncating.
2199 if (II->getIntrinsicID() != Intrinsic::fabs) {
2200 FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
2201 if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
2202 break;
2203 }
2204
2205 // Do unary FP operation on smaller type.
2206 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
2207 Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
2209 FPT.getModule(), II->getIntrinsicID(), Ty);
2211 II->getOperandBundlesAsDefs(OpBundles);
2212 CallInst *NewCI =
2213 CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
2214 // A normal value may be converted to an infinity. It means that we cannot
2215 // propagate ninf from the intrinsic. So we propagate FMF from fptrunc.
2216 NewCI->copyFastMathFlags(&FPT);
2217 return NewCI;
2218 }
2219 }
2220 }
2221
2222 if (Instruction *I = shrinkInsertElt(FPT, Builder))
2223 return I;
2224
2225 Value *Src = FPT.getOperand(0);
2226 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
2227 auto *FPCast = cast<CastInst>(Src);
2228 if (isKnownExactCastIntToFP(*FPCast, *this))
2229 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
2230 }
2231
2232 return nullptr;
2233}
2234
2236 // If the source operand is a cast from integer to FP and known exact, then
2237 // cast the integer operand directly to the destination type.
2238 Type *Ty = FPExt.getType();
2239 Value *Src = FPExt.getOperand(0);
2240 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
2241 auto *FPCast = cast<CastInst>(Src);
2242 if (isKnownExactCastIntToFP(*FPCast, *this))
2243 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
2244 }
2245
2246 return commonCastTransforms(FPExt);
2247}
2248
2249/// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
2250/// This is safe if the intermediate type has enough bits in its mantissa to
2251/// accurately represent all values of X. For example, this won't work with
2252/// i64 -> float -> i64.
2255 return nullptr;
2256
2257 auto *OpI = cast<CastInst>(FI.getOperand(0));
2258 Value *X = OpI->getOperand(0);
2259 Type *XType = X->getType();
2260 Type *DestType = FI.getType();
2261 bool IsOutputSigned = isa<FPToSIInst>(FI);
2262
2263 // Since we can assume the conversion won't overflow, our decision as to
2264 // whether the input will fit in the float should depend on the minimum
2265 // of the input range and output range.
2266
2267 // This means this is also safe for a signed input and unsigned output, since
2268 // a negative input would lead to undefined behavior.
2269 if (!isKnownExactCastIntToFP(*OpI, *this)) {
2270 // The first cast may not round exactly based on the source integer width
2271 // and FP width, but the overflow UB rules can still allow this to fold.
2272 // If the destination type is narrow, that means the intermediate FP value
2273 // must be large enough to hold the source value exactly.
2274 // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
2275 int OutputSize = (int)DestType->getScalarSizeInBits();
2276 if (OutputSize > OpI->getType()->getFPMantissaWidth())
2277 return nullptr;
2278 }
2279
2280 if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
2281 bool IsInputSigned = isa<SIToFPInst>(OpI);
2282 if (IsInputSigned && IsOutputSigned)
2283 return new SExtInst(X, DestType);
2284 return new ZExtInst(X, DestType);
2285 }
2286 if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
2287 return new TruncInst(X, DestType);
2288
2289 assert(XType == DestType && "Unexpected types for int to FP to int casts");
2290 return replaceInstUsesWith(FI, X);
2291}
2292
2294 // fpto{u/s}i non-norm --> 0
2295 FPClassTest Mask =
2296 FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal;
2298 FI.getOperand(0), Mask, IC.getSimplifyQuery().getWithInstruction(&FI));
2299 if (FPClass.isKnownNever(Mask))
2301
2302 return nullptr;
2303}
2304
2306 if (Instruction *I = foldItoFPtoI(FI))
2307 return I;
2308
2309 if (Instruction *I = foldFPtoI(FI, *this))
2310 return I;
2311
2312 return commonCastTransforms(FI);
2313}
2314
2316 if (Instruction *I = foldItoFPtoI(FI))
2317 return I;
2318
2319 if (Instruction *I = foldFPtoI(FI, *this))
2320 return I;
2321
2322 return commonCastTransforms(FI);
2323}
2324
2326 if (Instruction *R = commonCastTransforms(CI))
2327 return R;
2328 if (!CI.hasNonNeg() && isKnownNonNegative(CI.getOperand(0), SQ)) {
2329 CI.setNonNeg();
2330 return &CI;
2331 }
2332 return nullptr;
2333}
2334
2336 if (Instruction *R = commonCastTransforms(CI))
2337 return R;
2338 if (isKnownNonNegative(CI.getOperand(0), SQ)) {
2339 auto *UI =
2340 CastInst::Create(Instruction::UIToFP, CI.getOperand(0), CI.getType());
2341 UI->setNonNeg(true);
2342 return UI;
2343 }
2344 return nullptr;
2345}
2346
2348 // If the source integer type is not the intptr_t type for this target, do a
2349 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2350 // cast to be exposed to other transforms.
2351 unsigned AS = CI.getAddressSpace();
2352 if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
2353 DL.getPointerSizeInBits(AS)) {
2354 Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
2355 DL.getIntPtrType(CI.getContext(), AS));
2356 Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);
2357 return new IntToPtrInst(P, CI.getType());
2358 }
2359
2360 // Replace (inttoptr (add (ptrtoint %Base), %Offset)) with
2361 // (getelementptr i8, %Base, %Offset) if the pointer is only used as integer
2362 // value.
2363 Value *Base;
2364 Value *Offset;
2365 auto UsesPointerAsInt = [](User *U) {
2367 return true;
2368 if (auto *P = dyn_cast<PHINode>(U))
2369 return P->hasOneUse() && isa<ICmpInst, PtrToIntInst>(*P->user_begin());
2370 return false;
2371 };
2372 if (match(CI.getOperand(0),
2374 m_Value(Offset)))) &&
2376 Base->getType()->getPointerAddressSpace() &&
2377 all_of(CI.users(), UsesPointerAsInt)) {
2378 return GetElementPtrInst::Create(Builder.getInt8Ty(), Base, Offset);
2379 }
2380
2382 return I;
2383
2384 return nullptr;
2385}
2386
2388 // Look through chain of one-use GEPs.
2389 Type *PtrTy = Ptr->getType();
2391 while (true) {
2392 auto *GEP = dyn_cast<GEPOperator>(Ptr);
2393 if (!GEP || !GEP->hasOneUse())
2394 break;
2395 GEPs.push_back(GEP);
2396 Ptr = GEP->getPointerOperand();
2397 }
2398
2399 // Don't handle case where GEP converts from pointer to vector.
2400 if (GEPs.empty() || PtrTy != Ptr->getType())
2401 return nullptr;
2402
2403 // Check whether we know the integer value of the base pointer.
2404 Value *Res;
2405 Type *IdxTy = DL.getIndexType(PtrTy);
2406 if (match(Ptr, m_OneUse(m_IntToPtr(m_Value(Res)))) &&
2407 Res->getType() == IntTy && IntTy == IdxTy) {
2408 // pass
2409 } else if (isa<ConstantPointerNull>(Ptr)) {
2410 Res = Constant::getNullValue(IdxTy);
2411 } else {
2412 return nullptr;
2413 }
2414
2415 // Perform the entire operation on integers instead.
2416 for (GEPOperator *GEP : reverse(GEPs)) {
2417 Value *Offset = EmitGEPOffset(GEP);
2418 Res = Builder.CreateAdd(Res, Offset, "", GEP->hasNoUnsignedWrap());
2419 }
2420 return Builder.CreateZExtOrTrunc(Res, IntTy);
2421}
2422
2424 // If the destination integer type is not the intptr_t type for this target,
2425 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2426 // to be exposed to other transforms.
2428 Type *SrcTy = SrcOp->getType();
2429 Type *Ty = CI.getType();
2430 unsigned AS = CI.getPointerAddressSpace();
2431 unsigned TySize = Ty->getScalarSizeInBits();
2432 unsigned PtrSize = DL.getPointerSizeInBits(AS);
2433 if (TySize != PtrSize) {
2434 Type *IntPtrTy =
2435 SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
2436 Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
2437 return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
2438 }
2439
2440 // (ptrtoint (ptrmask P, M))
2441 // -> (and (ptrtoint P), M)
2442 // This is generally beneficial as `and` is better supported than `ptrmask`.
2443 Value *Ptr, *Mask;
2445 m_Value(Mask)))) &&
2446 Mask->getType() == Ty)
2447 return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
2448
2449 if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp))
2450 return replaceInstUsesWith(CI, V);
2451
2452 Value *Vec, *Scalar, *Index;
2454 m_Value(Scalar), m_Value(Index)))) &&
2455 Vec->getType() == Ty) {
2456 assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2457 // Convert the scalar to int followed by insert to eliminate one cast:
2458 // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2459 Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2460 return InsertElementInst::Create(Vec, NewCast, Index);
2461 }
2462
2463 return commonCastTransforms(CI);
2464}
2465
2468 Type *Ty = CI.getType();
2469
2470 // (ptrtoaddr (ptrmask P, M))
2471 // -> (and (ptrtoaddr P), M)
2472 // This is generally beneficial as `and` is better supported than `ptrmask`.
2473 Value *Ptr, *Mask;
2475 m_Value(Mask)))) &&
2476 Mask->getType() == Ty)
2477 return BinaryOperator::CreateAnd(Builder.CreatePtrToAddr(Ptr), Mask);
2478
2479 if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp))
2480 return replaceInstUsesWith(CI, V);
2481
2482 // FIXME: Implement variants of ptrtoint folds.
2483 return commonCastTransforms(CI);
2484}
2485
2486/// This input value (which is known to have vector type) is being zero extended
2487/// or truncated to the specified vector type. Since the zext/trunc is done
2488/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2489/// endianness will impact which end of the vector that is extended or
2490/// truncated.
2491///
2492/// A vector is always stored with index 0 at the lowest address, which
2493/// corresponds to the most significant bits for a big endian stored integer and
2494/// the least significant bits for little endian. A trunc/zext of an integer
2495/// impacts the big end of the integer. Thus, we need to add/remove elements at
2496/// the front of the vector for big endian targets, and the back of the vector
2497/// for little endian targets.
2498///
2499/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2500///
2501/// The source and destination vector types may have different element types.
2502static Instruction *
2504 InstCombinerImpl &IC) {
2505 // We can only do this optimization if the output is a multiple of the input
2506 // element size, or the input is a multiple of the output element size.
2507 // Convert the input type to have the same element type as the output.
2508 VectorType *SrcTy = cast<VectorType>(InVal->getType());
2509
2510 if (SrcTy->getElementType() != DestTy->getElementType()) {
2511 // The input types don't need to be identical, but for now they must be the
2512 // same size. There is no specific reason we couldn't handle things like
2513 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2514 // there yet.
2515 if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2516 DestTy->getElementType()->getPrimitiveSizeInBits())
2517 return nullptr;
2518
2519 SrcTy =
2520 FixedVectorType::get(DestTy->getElementType(),
2521 cast<FixedVectorType>(SrcTy)->getNumElements());
2522 InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2523 }
2524
2525 bool IsBigEndian = IC.getDataLayout().isBigEndian();
2526 unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2527 unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2528
2529 assert(SrcElts != DestElts && "Element counts should be different.");
2530
2531 // Now that the element types match, get the shuffle mask and RHS of the
2532 // shuffle to use, which depends on whether we're increasing or decreasing the
2533 // size of the input.
2534 auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
2535 ArrayRef<int> ShuffleMask;
2536 Value *V2;
2537
2538 if (SrcElts > DestElts) {
2539 // If we're shrinking the number of elements (rewriting an integer
2540 // truncate), just shuffle in the elements corresponding to the least
2541 // significant bits from the input and use poison as the second shuffle
2542 // input.
2543 V2 = PoisonValue::get(SrcTy);
2544 // Make sure the shuffle mask selects the "least significant bits" by
2545 // keeping elements from back of the src vector for big endian, and from the
2546 // front for little endian.
2547 ShuffleMask = ShuffleMaskStorage;
2548 if (IsBigEndian)
2549 ShuffleMask = ShuffleMask.take_back(DestElts);
2550 else
2551 ShuffleMask = ShuffleMask.take_front(DestElts);
2552 } else {
2553 // If we're increasing the number of elements (rewriting an integer zext),
2554 // shuffle in all of the elements from InVal. Fill the rest of the result
2555 // elements with zeros from a constant zero.
2556 V2 = Constant::getNullValue(SrcTy);
2557 // Use first elt from V2 when indicating zero in the shuffle mask.
2558 uint32_t NullElt = SrcElts;
2559 // Extend with null values in the "most significant bits" by adding elements
2560 // in front of the src vector for big endian, and at the back for little
2561 // endian.
2562 unsigned DeltaElts = DestElts - SrcElts;
2563 if (IsBigEndian)
2564 ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2565 else
2566 ShuffleMaskStorage.append(DeltaElts, NullElt);
2567 ShuffleMask = ShuffleMaskStorage;
2568 }
2569
2570 return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2571}
2572
2573static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2574 return Value % Ty->getPrimitiveSizeInBits() == 0;
2575}
2576
2577static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2578 return Value / Ty->getPrimitiveSizeInBits();
2579}
2580
2581/// V is a value which is inserted into a vector of VecEltTy.
2582/// Look through the value to see if we can decompose it into
2583/// insertions into the vector. See the example in the comment for
2584/// OptimizeIntegerToVectorInsertions for the pattern this handles.
2585/// The type of V is always a non-zero multiple of VecEltTy's size.
2586/// Shift is the number of bits between the lsb of V and the lsb of
2587/// the vector.
2588///
2589/// This returns false if the pattern can't be matched or true if it can,
2590/// filling in Elements with the elements found here.
2591static bool collectInsertionElements(Value *V, unsigned Shift,
2592 SmallVectorImpl<Value *> &Elements,
2593 Type *VecEltTy, bool isBigEndian) {
2594 assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2595 "Shift should be a multiple of the element type size");
2596
2597 // Undef values never contribute useful bits to the result.
2598 if (isa<UndefValue>(V)) return true;
2599
2600 // If we got down to a value of the right type, we win, try inserting into the
2601 // right element.
2602 if (V->getType() == VecEltTy) {
2603 // Inserting null doesn't actually insert any elements.
2604 if (Constant *C = dyn_cast<Constant>(V))
2605 if (C->isNullValue())
2606 return true;
2607
2608 unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2609 if (isBigEndian)
2610 ElementIndex = Elements.size() - ElementIndex - 1;
2611
2612 // Fail if multiple elements are inserted into this slot.
2613 if (Elements[ElementIndex])
2614 return false;
2615
2616 Elements[ElementIndex] = V;
2617 return true;
2618 }
2619
2620 if (Constant *C = dyn_cast<Constant>(V)) {
2621 // Figure out the # elements this provides, and bitcast it or slice it up
2622 // as required.
2623 unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2624 VecEltTy);
2625 // If the constant is the size of a vector element, we just need to bitcast
2626 // it to the right type so it gets properly inserted.
2627 if (NumElts == 1)
2629 Shift, Elements, VecEltTy, isBigEndian);
2630
2631 // Okay, this is a constant that covers multiple elements. Slice it up into
2632 // pieces and insert each element-sized piece into the vector.
2633 if (!isa<IntegerType>(C->getType()))
2634 C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
2635 C->getType()->getPrimitiveSizeInBits()));
2636 unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2637 Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2638
2639 for (unsigned i = 0; i != NumElts; ++i) {
2640 unsigned ShiftI = i * ElementSize;
2642 Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI));
2643 if (!Piece)
2644 return false;
2645
2646 Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2647 if (!collectInsertionElements(Piece, ShiftI + Shift, Elements, VecEltTy,
2648 isBigEndian))
2649 return false;
2650 }
2651 return true;
2652 }
2653
2654 if (!V->hasOneUse()) return false;
2655
2657 if (!I) return false;
2658 switch (I->getOpcode()) {
2659 default: return false; // Unhandled case.
2660 case Instruction::BitCast:
2661 if (I->getOperand(0)->getType()->isVectorTy())
2662 return false;
2663 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2664 isBigEndian);
2665 case Instruction::ZExt:
2667 I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2668 VecEltTy))
2669 return false;
2670 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2671 isBigEndian);
2672 case Instruction::Or:
2673 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2674 isBigEndian) &&
2675 collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2676 isBigEndian);
2677 case Instruction::Shl: {
2678 // Must be shifting by a constant that is a multiple of the element size.
2679 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2680 if (!CI) return false;
2681 Shift += CI->getZExtValue();
2682 if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2683 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2684 isBigEndian);
2685 }
2686
2687 }
2688}
2689
2690
2691/// If the input is an 'or' instruction, we may be doing shifts and ors to
2692/// assemble the elements of the vector manually.
2693/// Try to rip the code out and replace it with insertelements. This is to
2694/// optimize code like this:
2695///
2696/// %tmp37 = bitcast float %inc to i32
2697/// %tmp38 = zext i32 %tmp37 to i64
2698/// %tmp31 = bitcast float %inc5 to i32
2699/// %tmp32 = zext i32 %tmp31 to i64
2700/// %tmp33 = shl i64 %tmp32, 32
2701/// %ins35 = or i64 %tmp33, %tmp38
2702/// %tmp43 = bitcast i64 %ins35 to <2 x float>
2703///
2704/// Into two insertelements that do "buildvector{%inc, %inc5}".
2706 InstCombinerImpl &IC) {
2707 auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2708 Value *IntInput = CI.getOperand(0);
2709
2710 // if the int input is just an undef value do not try to optimize to vector
2711 // insertions as it will prevent undef propagation
2712 if (isa<UndefValue>(IntInput))
2713 return nullptr;
2714
2715 SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2716 if (!collectInsertionElements(IntInput, 0, Elements,
2717 DestVecTy->getElementType(),
2718 IC.getDataLayout().isBigEndian()))
2719 return nullptr;
2720
2721 // If we succeeded, we know that all of the element are specified by Elements
2722 // or are zero if Elements has a null entry. Recast this as a set of
2723 // insertions.
2724 Value *Result = Constant::getNullValue(CI.getType());
2725 for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2726 if (!Elements[i]) continue; // Unset element.
2727
2728 Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2729 IC.Builder.getInt32(i));
2730 }
2731
2732 return Result;
2733}
2734
2735/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2736/// vector followed by extract element. The backend tends to handle bitcasts of
2737/// vectors better than bitcasts of scalars because vector registers are
2738/// usually not type-specific like scalar integer or scalar floating-point.
2740 InstCombinerImpl &IC) {
2741 Value *VecOp, *Index;
2742 if (!match(BitCast.getOperand(0),
2743 m_OneUse(m_ExtractElt(m_Value(VecOp), m_Value(Index)))))
2744 return nullptr;
2745
2746 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2747 // type to extract from.
2748 Type *DestType = BitCast.getType();
2749 VectorType *VecType = cast<VectorType>(VecOp->getType());
2750 if (VectorType::isValidElementType(DestType)) {
2751 auto *NewVecType = VectorType::get(DestType, VecType);
2752 auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
2753 return ExtractElementInst::Create(NewBC, Index);
2754 }
2755
2756 // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2757 // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2758 auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
2759 if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
2760 return CastInst::Create(Instruction::BitCast, VecOp, DestType);
2761
2762 return nullptr;
2763}
2764
2765/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2767 InstCombiner::BuilderTy &Builder) {
2768 Type *DestTy = BitCast.getType();
2769 BinaryOperator *BO;
2770
2771 if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2772 !BO->isBitwiseLogicOp())
2773 return nullptr;
2774
2775 // FIXME: This transform is restricted to vector types to avoid backend
2776 // problems caused by creating potentially illegal operations. If a fix-up is
2777 // added to handle that situation, we can remove this check.
2778 if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2779 return nullptr;
2780
2781 if (DestTy->isFPOrFPVectorTy()) {
2782 Value *X, *Y;
2783 // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2784 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2786 if (X->getType()->isFPOrFPVectorTy() &&
2787 Y->getType()->isIntOrIntVectorTy()) {
2788 Value *CastedOp =
2789 Builder.CreateBitCast(BO->getOperand(0), Y->getType());
2790 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
2791 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2792 }
2793 if (X->getType()->isIntOrIntVectorTy() &&
2794 Y->getType()->isFPOrFPVectorTy()) {
2795 Value *CastedOp =
2796 Builder.CreateBitCast(BO->getOperand(1), X->getType());
2797 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
2798 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2799 }
2800 }
2801 return nullptr;
2802 }
2803
2804 if (!DestTy->isIntOrIntVectorTy())
2805 return nullptr;
2806
2807 Value *X;
2808 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2809 X->getType() == DestTy && !isa<Constant>(X)) {
2810 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2811 Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2812 return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2813 }
2814
2815 if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2816 X->getType() == DestTy && !isa<Constant>(X)) {
2817 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2818 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2819 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2820 }
2821
2822 // Canonicalize vector bitcasts to come before vector bitwise logic with a
2823 // constant. This eases recognition of special constants for later ops.
2824 // Example:
2825 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2826 Constant *C;
2827 if (match(BO->getOperand(1), m_Constant(C))) {
2828 // bitcast (logic X, C) --> logic (bitcast X, C')
2829 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2830 Value *CastedC = Builder.CreateBitCast(C, DestTy);
2831 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
2832 }
2833
2834 return nullptr;
2835}
2836
2837/// Change the type of a select if we can eliminate a bitcast.
2839 InstCombiner::BuilderTy &Builder) {
2840 Value *Cond, *TVal, *FVal;
2841 if (!match(BitCast.getOperand(0),
2842 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
2843 return nullptr;
2844
2845 // A vector select must maintain the same number of elements in its operands.
2846 Type *CondTy = Cond->getType();
2847 Type *DestTy = BitCast.getType();
2848 if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
2849 if (!DestTy->isVectorTy() ||
2850 CondVTy->getElementCount() !=
2851 cast<VectorType>(DestTy)->getElementCount())
2852 return nullptr;
2853
2854 // FIXME: This transform is restricted from changing the select between
2855 // scalars and vectors to avoid backend problems caused by creating
2856 // potentially illegal operations. If a fix-up is added to handle that
2857 // situation, we can remove this check.
2858 if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
2859 return nullptr;
2860
2861 auto *Sel = cast<Instruction>(BitCast.getOperand(0));
2862 Value *X;
2863 if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2864 !isa<Constant>(X)) {
2865 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2866 Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
2867 return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
2868 }
2869
2870 if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2871 !isa<Constant>(X)) {
2872 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2873 Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
2874 return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
2875 }
2876
2877 return nullptr;
2878}
2879
2880/// Check if all users of CI are StoreInsts.
2881static bool hasStoreUsersOnly(CastInst &CI) {
2882 for (User *U : CI.users()) {
2883 if (!isa<StoreInst>(U))
2884 return false;
2885 }
2886 return true;
2887}
2888
2889/// This function handles following case
2890///
2891/// A -> B cast
2892/// PHI
2893/// B -> A cast
2894///
2895/// All the related PHI nodes can be replaced by new PHI nodes with type A.
2896/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
2897Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
2898 PHINode *PN) {
2899 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2900 if (hasStoreUsersOnly(CI))
2901 return nullptr;
2902
2903 Value *Src = CI.getOperand(0);
2904 Type *SrcTy = Src->getType(); // Type B
2905 Type *DestTy = CI.getType(); // Type A
2906
2907 SmallVector<PHINode *, 4> PhiWorklist;
2908 SmallSetVector<PHINode *, 4> OldPhiNodes;
2909
2910 // Find all of the A->B casts and PHI nodes.
2911 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2912 // OldPhiNodes is used to track all known PHI nodes, before adding a new
2913 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2914 PhiWorklist.push_back(PN);
2915 OldPhiNodes.insert(PN);
2916 while (!PhiWorklist.empty()) {
2917 auto *OldPN = PhiWorklist.pop_back_val();
2918 for (Value *IncValue : OldPN->incoming_values()) {
2919 if (isa<Constant>(IncValue))
2920 continue;
2921
2922 if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
2923 // If there is a sequence of one or more load instructions, each loaded
2924 // value is used as address of later load instruction, bitcast is
2925 // necessary to change the value type, don't optimize it. For
2926 // simplicity we give up if the load address comes from another load.
2927 Value *Addr = LI->getOperand(0);
2928 if (Addr == &CI || isa<LoadInst>(Addr))
2929 return nullptr;
2930 // Don't tranform "load <256 x i32>, <256 x i32>*" to
2931 // "load x86_amx, x86_amx*", because x86_amx* is invalid.
2932 // TODO: Remove this check when bitcast between vector and x86_amx
2933 // is replaced with a specific intrinsic.
2934 if (DestTy->isX86_AMXTy())
2935 return nullptr;
2936 if (LI->hasOneUse() && LI->isSimple())
2937 continue;
2938 // If a LoadInst has more than one use, changing the type of loaded
2939 // value may create another bitcast.
2940 return nullptr;
2941 }
2942
2943 if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
2944 if (OldPhiNodes.insert(PNode))
2945 PhiWorklist.push_back(PNode);
2946 continue;
2947 }
2948
2949 auto *BCI = dyn_cast<BitCastInst>(IncValue);
2950 // We can't handle other instructions.
2951 if (!BCI)
2952 return nullptr;
2953
2954 // Verify it's a A->B cast.
2955 Type *TyA = BCI->getOperand(0)->getType();
2956 Type *TyB = BCI->getType();
2957 if (TyA != DestTy || TyB != SrcTy)
2958 return nullptr;
2959 }
2960 }
2961
2962 // Check that each user of each old PHI node is something that we can
2963 // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2964 for (auto *OldPN : OldPhiNodes) {
2965 for (User *V : OldPN->users()) {
2966 if (auto *SI = dyn_cast<StoreInst>(V)) {
2967 if (!SI->isSimple() || SI->getOperand(0) != OldPN)
2968 return nullptr;
2969 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2970 // Verify it's a B->A cast.
2971 Type *TyB = BCI->getOperand(0)->getType();
2972 Type *TyA = BCI->getType();
2973 if (TyA != DestTy || TyB != SrcTy)
2974 return nullptr;
2975 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2976 // As long as the user is another old PHI node, then even if we don't
2977 // rewrite it, the PHI web we're considering won't have any users
2978 // outside itself, so it'll be dead.
2979 if (!OldPhiNodes.contains(PHI))
2980 return nullptr;
2981 } else {
2982 return nullptr;
2983 }
2984 }
2985 }
2986
2987 // For each old PHI node, create a corresponding new PHI node with a type A.
2988 SmallDenseMap<PHINode *, PHINode *> NewPNodes;
2989 for (auto *OldPN : OldPhiNodes) {
2990 Builder.SetInsertPoint(OldPN);
2991 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
2992 NewPNodes[OldPN] = NewPN;
2993 }
2994
2995 // Fill in the operands of new PHI nodes.
2996 for (auto *OldPN : OldPhiNodes) {
2997 PHINode *NewPN = NewPNodes[OldPN];
2998 for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
2999 Value *V = OldPN->getOperand(j);
3000 Value *NewV = nullptr;
3001 if (auto *C = dyn_cast<Constant>(V)) {
3002 NewV = ConstantExpr::getBitCast(C, DestTy);
3003 } else if (auto *LI = dyn_cast<LoadInst>(V)) {
3004 // Explicitly perform load combine to make sure no opposing transform
3005 // can remove the bitcast in the meantime and trigger an infinite loop.
3006 Builder.SetInsertPoint(LI);
3007 NewV = combineLoadToNewType(*LI, DestTy);
3008 // Remove the old load and its use in the old phi, which itself becomes
3009 // dead once the whole transform finishes.
3010 replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
3012 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3013 NewV = BCI->getOperand(0);
3014 } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
3015 NewV = NewPNodes[PrevPN];
3016 }
3017 assert(NewV);
3018 NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
3019 }
3020 }
3021
3022 // Traverse all accumulated PHI nodes and process its users,
3023 // which are Stores and BitcCasts. Without this processing
3024 // NewPHI nodes could be replicated and could lead to extra
3025 // moves generated after DeSSA.
3026 // If there is a store with type B, change it to type A.
3027
3028
3029 // Replace users of BitCast B->A with NewPHI. These will help
3030 // later to get rid off a closure formed by OldPHI nodes.
3031 Instruction *RetVal = nullptr;
3032 for (auto *OldPN : OldPhiNodes) {
3033 PHINode *NewPN = NewPNodes[OldPN];
3034 for (User *V : make_early_inc_range(OldPN->users())) {
3035 if (auto *SI = dyn_cast<StoreInst>(V)) {
3036 assert(SI->isSimple() && SI->getOperand(0) == OldPN);
3037 Builder.SetInsertPoint(SI);
3038 auto *NewBC =
3039 cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
3040 SI->setOperand(0, NewBC);
3041 Worklist.push(SI);
3042 assert(hasStoreUsersOnly(*NewBC));
3043 }
3044 else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3045 Type *TyB = BCI->getOperand(0)->getType();
3046 Type *TyA = BCI->getType();
3047 assert(TyA == DestTy && TyB == SrcTy);
3048 (void) TyA;
3049 (void) TyB;
3050 Instruction *I = replaceInstUsesWith(*BCI, NewPN);
3051 if (BCI == &CI)
3052 RetVal = I;
3053 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
3054 assert(OldPhiNodes.contains(PHI));
3055 (void) PHI;
3056 } else {
3057 llvm_unreachable("all uses should be handled");
3058 }
3059 }
3060 }
3061
3062 return RetVal;
3063}
3064
3065/// Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to
3066/// copysign((bitcast Y to fp), X)
3068 InstCombiner::BuilderTy &Builder,
3069 const SimplifyQuery &SQ) {
3070 Value *X, *Y;
3071 Type *FTy = CI.getType();
3072 if (!FTy->isFPOrFPVectorTy())
3073 return nullptr;
3076 m_Value(Y)))))
3077 return nullptr;
3078 if (X->getType() != FTy)
3079 return nullptr;
3080 if (!isKnownNonNegative(Y, SQ))
3081 return nullptr;
3082
3083 return Builder.CreateCopySign(Builder.CreateBitCast(Y, FTy), X);
3084}
3085
3087 // If the operands are integer typed then apply the integer transforms,
3088 // otherwise just apply the common ones.
3089 Value *Src = CI.getOperand(0);
3090 Type *SrcTy = Src->getType();
3091 Type *DestTy = CI.getType();
3092
3093 // Get rid of casts from one type to the same type. These are useless and can
3094 // be replaced by the operand.
3095 if (DestTy == Src->getType())
3096 return replaceInstUsesWith(CI, Src);
3097
3098 if (isa<FixedVectorType>(DestTy)) {
3099 if (isa<IntegerType>(SrcTy)) {
3100 // If this is a cast from an integer to vector, check to see if the input
3101 // is a trunc or zext of a bitcast from vector. If so, we can replace all
3102 // the casts with a shuffle and (potentially) a bitcast.
3103 if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
3104 CastInst *SrcCast = cast<CastInst>(Src);
3105 if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
3106 if (isa<VectorType>(BCIn->getOperand(0)->getType()))
3108 BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
3109 return I;
3110 }
3111
3112 // If the input is an 'or' instruction, we may be doing shifts and ors to
3113 // assemble the elements of the vector manually. Try to rip the code out
3114 // and replace it with insertelements.
3115 if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
3116 return replaceInstUsesWith(CI, V);
3117 }
3118 }
3119
3120 if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
3121 if (SrcVTy->getNumElements() == 1) {
3122 // If our destination is not a vector, then make this a straight
3123 // scalar-scalar cast.
3124 if (!DestTy->isVectorTy()) {
3125 Value *Elem =
3126 Builder.CreateExtractElement(Src,
3128 return CastInst::Create(Instruction::BitCast, Elem, DestTy);
3129 }
3130
3131 // Otherwise, see if our source is an insert. If so, then use the scalar
3132 // component directly:
3133 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
3134 if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
3135 return new BitCastInst(InsElt->getOperand(1), DestTy);
3136 }
3137
3138 // Convert an artificial vector insert into more analyzable bitwise logic.
3139 unsigned BitWidth = DestTy->getScalarSizeInBits();
3140 Value *X, *Y;
3141 uint64_t IndexC;
3143 m_Value(Y), m_ConstantInt(IndexC)))) &&
3144 DestTy->isIntegerTy() && X->getType() == DestTy &&
3145 Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
3146 // Adjust for big endian - the LSBs are at the high index.
3147 if (DL.isBigEndian())
3148 IndexC = SrcVTy->getNumElements() - 1 - IndexC;
3149
3150 // We only handle (endian-normalized) insert to index 0. Any other insert
3151 // would require a left-shift, so that is an extra instruction.
3152 if (IndexC == 0) {
3153 // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
3154 unsigned EltWidth = Y->getType()->getScalarSizeInBits();
3155 APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
3156 Value *AndX = Builder.CreateAnd(X, MaskC);
3157 Value *ZextY = Builder.CreateZExt(Y, DestTy);
3158 return BinaryOperator::CreateOr(AndX, ZextY);
3159 }
3160 }
3161 }
3162
3163 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
3164 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
3165 // a bitcast to a vector with the same # elts.
3166 Value *ShufOp0 = Shuf->getOperand(0);
3167 Value *ShufOp1 = Shuf->getOperand(1);
3168 auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
3169 auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
3170 if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
3171 cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
3172 ShufElts == SrcVecElts) {
3173 BitCastInst *Tmp;
3174 // If either of the operands is a cast from CI.getType(), then
3175 // evaluating the shuffle in the casted destination's type will allow
3176 // us to eliminate at least one cast.
3177 if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
3178 Tmp->getOperand(0)->getType() == DestTy) ||
3179 ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
3180 Tmp->getOperand(0)->getType() == DestTy)) {
3181 Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
3182 Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
3183 // Return a new shuffle vector. Use the same element ID's, as we
3184 // know the vector types match #elts.
3185 return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
3186 }
3187 }
3188
3189 // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
3190 // as a byte/bit swap:
3191 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
3192 // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
3193 if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&
3194 Shuf->hasOneUse() && Shuf->isReverse()) {
3195 unsigned IntrinsicNum = 0;
3196 if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
3197 SrcTy->getScalarSizeInBits() == 8) {
3198 IntrinsicNum = Intrinsic::bswap;
3199 } else if (SrcTy->getScalarSizeInBits() == 1) {
3200 IntrinsicNum = Intrinsic::bitreverse;
3201 }
3202 if (IntrinsicNum != 0) {
3203 assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
3204 assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
3205 Function *BswapOrBitreverse = Intrinsic::getOrInsertDeclaration(
3206 CI.getModule(), IntrinsicNum, DestTy);
3207 Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
3208 return CallInst::Create(BswapOrBitreverse, {ScalarX});
3209 }
3210 }
3211 }
3212
3213 // Handle the A->B->A cast, and there is an intervening PHI node.
3214 if (PHINode *PN = dyn_cast<PHINode>(Src))
3215 if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
3216 return I;
3217
3218 if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
3219 return I;
3220
3222 return I;
3223
3225 return I;
3226
3227 if (Value *V = foldCopySignIdioms(CI, Builder, SQ.getWithInstruction(&CI)))
3228 return replaceInstUsesWith(CI, V);
3229
3230 return commonCastTransforms(CI);
3231}
3232
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file defines the DenseMap class.
static bool isSigned(unsigned int Opcode)
Hexagon Common GEP
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
static Value * foldCopySignIdioms(BitCastInst &CI, InstCombiner::BuilderTy &Builder, const SimplifyQuery &SQ)
Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to copysign((bitcast Y to fp),...
static Type * shrinkFPConstantVector(Value *V, bool PreferBFloat)
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
static Instruction * foldFPtoI(Instruction &FI, InstCombiner &IC)
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
static bool fitsInFPType(APFloat F, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
static Type * getMinimumFPType(Value *V, bool PreferBFloat)
Find the minimum FP type we can safely truncate to.
SmallDenseMap< Value *, Value *, 8 > EvaluatedMap
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
static Type * shrinkFPConstant(LLVMContext &Ctx, const APFloat &F, bool PreferBFloat)
static Instruction * foldVecExtTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Whenever an element is extracted from a vector, optionally shifted down, and then truncated,...
static Value * EvaluateInDifferentTypeImpl(Value *V, Type *Ty, bool isSigned, InstCombinerImpl &IC, EvaluatedMap &Processed)
static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC)
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
uint64_t IntrinsicInst * II
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
Value * RHS
Value * LHS
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:304
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1573
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1112
int32_t exactLogBase2() const
Definition APInt.h:1784
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1657
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
This class represents a conversion between pointers from one address space to another.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:69
LLVM_ABI std::optional< unsigned > getVScaleRangeMax() const
Returns the maximum value for the vscale_range attribute or std::nullopt when unknown.
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:244
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:248
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Type * getSrcTy() const
Return the source type, as a convenience.
Definition InstrTypes.h:615
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:610
static LLVM_ABI unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, const DataLayout *DL)
Determine how a pair of casts can be eliminated, if they can be at all.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
static LLVM_ABI CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a Trunc or BitCast cast instruction.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Type * getDestTy() const
Return the destination type, as a convenience.
Definition InstrTypes.h:617
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
const APFloat & getValueAPF() const
Definition Constants.h:320
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
bool isBigEndian() const
Definition DataLayout.h:208
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This class represents an extension of floating point types.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
This class represents a truncation of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2579
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:522
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2207
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * visitZExt(ZExtInst &Zext)
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Instruction * visitSExt(SExtInst &Sext)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Instruction * visitFPToSI(FPToSIInst &FI)
Instruction * visitTrunc(TruncInst &CI)
Instruction * visitUIToFP(CastInst &CI)
Instruction * visitPtrToInt(PtrToIntInst &CI)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * visitSIToFP(CastInst &CI)
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * foldItoFPtoI(CastInst &FI)
fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate ty...
Instruction * visitFPTrunc(FPTruncInst &CI)
Value * foldPtrToIntOrAddrOfGEP(Type *IntTy, Value *Ptr)
Instruction * visitBitCast(BitCastInst &CI)
Instruction * visitIntToPtr(IntToPtrInst &CI)
Instruction * visitFPToUI(FPToUIInst &FI)
Instruction * visitPtrToAddr(PtrToAddrInst &CI)
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
Instruction * visitFPExt(CastInst &CI)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
unsigned ComputeNumSignBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
BuilderTy & Builder
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool hasNonNeg() const LLVM_READONLY
Determine whether the the nneg flag is set.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag.
This class represents a cast from an integer to a pointer.
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents a cast from a pointer to an address (non-capturing ptrtoint).
Value * getPointerOperand()
Gets the pointer operand.
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
This class represents a sign extension of integer types.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:149
This instruction constructs a fixed permutation of two input vectors.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class represents a truncation of integer types.
void setHasNoSignedWrap(bool B)
void setHasNoUnsignedWrap(bool B)
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition Type.h:200
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:285
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition Type.cpp:235
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:283
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
'undef' values are things that do not have specified contents.
Definition Constants.h:1425
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
This class represents zero extension of integer types.
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:237
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_lowbit_mask > m_LowBitMask()
Match an integer or vector with only the low bit(s) set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrToIntSameSize_match< OpTy > m_PtrToIntSameSize(const DataLayout &DL, const OpTy &Op)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
cst_pred_ty< is_sign_mask > m_SignMask()
Match an integer or vector with only the sign bit(s) set.
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
specific_intval< true > m_SpecificIntAllowPoison(const APInt &V)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
Definition MathExtras.h:350
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI Value * simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, const SimplifyQuery &Q)
Given operands for a CastInst, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition Local.cpp:2414
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ And
Bitwise or logical AND of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2108
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:242
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:248
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
bool isKnownNever(FPClassTest Mask) const
Return true if it's known this can never be one of the mask entries.
Matching combinators.
SimplifyQuery getWithInstruction(const Instruction *I) const