LLVM 23.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APFloat.h"
37#include "llvm/ADT/APInt.h"
38#include "llvm/ADT/ArrayRef.h"
39#include "llvm/ADT/DenseMap.h"
42#include "llvm/ADT/Statistic.h"
47#include "llvm/Analysis/CFG.h"
62#include "llvm/IR/BasicBlock.h"
63#include "llvm/IR/CFG.h"
64#include "llvm/IR/Constant.h"
65#include "llvm/IR/Constants.h"
66#include "llvm/IR/DIBuilder.h"
67#include "llvm/IR/DataLayout.h"
68#include "llvm/IR/DebugInfo.h"
70#include "llvm/IR/Dominators.h"
72#include "llvm/IR/Function.h"
74#include "llvm/IR/IRBuilder.h"
75#include "llvm/IR/InstrTypes.h"
76#include "llvm/IR/Instruction.h"
79#include "llvm/IR/Intrinsics.h"
80#include "llvm/IR/Metadata.h"
81#include "llvm/IR/Operator.h"
82#include "llvm/IR/PassManager.h"
84#include "llvm/IR/Type.h"
85#include "llvm/IR/Use.h"
86#include "llvm/IR/User.h"
87#include "llvm/IR/Value.h"
88#include "llvm/IR/ValueHandle.h"
93#include "llvm/Support/Debug.h"
102#include <algorithm>
103#include <cassert>
104#include <cstdint>
105#include <memory>
106#include <optional>
107#include <string>
108#include <utility>
109
110#define DEBUG_TYPE "instcombine"
112#include <optional>
113
114using namespace llvm;
115using namespace llvm::PatternMatch;
116
117STATISTIC(NumWorklistIterations,
118 "Number of instruction combining iterations performed");
119STATISTIC(NumOneIteration, "Number of functions with one iteration");
120STATISTIC(NumTwoIterations, "Number of functions with two iterations");
121STATISTIC(NumThreeIterations, "Number of functions with three iterations");
122STATISTIC(NumFourOrMoreIterations,
123 "Number of functions with four or more iterations");
124
125STATISTIC(NumCombined , "Number of insts combined");
126STATISTIC(NumConstProp, "Number of constant folds");
127STATISTIC(NumDeadInst , "Number of dead inst eliminated");
128STATISTIC(NumSunkInst , "Number of instructions sunk");
129STATISTIC(NumExpand, "Number of expansions");
130STATISTIC(NumFactor , "Number of factorizations");
131STATISTIC(NumReassoc , "Number of reassociations");
132DEBUG_COUNTER(VisitCounter, "instcombine-visit",
133 "Controls which instructions are visited");
134
135static cl::opt<bool> EnableCodeSinking("instcombine-code-sinking",
136 cl::desc("Enable code sinking"),
137 cl::init(true));
138
140 "instcombine-max-sink-users", cl::init(32),
141 cl::desc("Maximum number of undroppable users for instruction sinking"));
142
144MaxArraySize("instcombine-maxarray-size", cl::init(1024),
145 cl::desc("Maximum array size considered when doing a combine"));
146
148 "instcombine-max-allocsite-removable-users", cl::Hidden, cl::init(2048),
149 cl::desc("Maximum number of users to visit in alloc-site "
150 "removability analysis"));
151
152namespace llvm {
154} // end namespace llvm
155
156// FIXME: Remove this flag when it is no longer necessary to convert
157// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
158// increases variable availability at the cost of accuracy. Variables that
159// cannot be promoted by mem2reg or SROA will be described as living in memory
160// for their entire lifetime. However, passes like DSE and instcombine can
161// delete stores to the alloca, leading to misleading and inaccurate debug
162// information. This flag can be removed when those passes are fixed.
163static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
164 cl::Hidden, cl::init(true));
165
166std::optional<Instruction *>
168 // Handle target specific intrinsics
169 if (II.getCalledFunction()->isTargetIntrinsic()) {
170 return TTIForTargetIntrinsicsOnly.instCombineIntrinsic(*this, II);
171 }
172 return std::nullopt;
173}
174
176 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
177 bool &KnownBitsComputed) {
178 // Handle target specific intrinsics
179 if (II.getCalledFunction()->isTargetIntrinsic()) {
180 return TTIForTargetIntrinsicsOnly.simplifyDemandedUseBitsIntrinsic(
181 *this, II, DemandedMask, Known, KnownBitsComputed);
182 }
183 return std::nullopt;
184}
185
187 IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
188 APInt &PoisonElts2, APInt &PoisonElts3,
189 std::function<void(Instruction *, unsigned, APInt, APInt &)>
190 SimplifyAndSetOp) {
191 // Handle target specific intrinsics
192 if (II.getCalledFunction()->isTargetIntrinsic()) {
193 return TTIForTargetIntrinsicsOnly.simplifyDemandedVectorEltsIntrinsic(
194 *this, II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
195 SimplifyAndSetOp);
196 }
197 return std::nullopt;
198}
199
200bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
201 // Approved exception for TTI use: This queries a legality property of the
202 // target, not an profitability heuristic. Ideally this should be part of
203 // DataLayout instead.
204 return TTIForTargetIntrinsicsOnly.isValidAddrSpaceCast(FromAS, ToAS);
205}
206
207Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) {
208 if (!RewriteGEP)
209 return llvm::emitGEPOffset(&Builder, DL, GEP);
210
211 IRBuilderBase::InsertPointGuard Guard(Builder);
212 auto *Inst = dyn_cast<Instruction>(GEP);
213 if (Inst)
214 Builder.SetInsertPoint(Inst);
215
216 Value *Offset = EmitGEPOffset(GEP);
217 // Rewrite non-trivial GEPs to avoid duplicating the offset arithmetic.
218 if (Inst && !GEP->hasAllConstantIndices() &&
219 !GEP->getSourceElementType()->isIntegerTy(8)) {
221 *Inst, Builder.CreateGEP(Builder.getInt8Ty(), GEP->getPointerOperand(),
222 Offset, "", GEP->getNoWrapFlags()));
224 }
225 return Offset;
226}
227
228Value *InstCombinerImpl::EmitGEPOffsets(ArrayRef<GEPOperator *> GEPs,
229 GEPNoWrapFlags NW, Type *IdxTy,
230 bool RewriteGEPs) {
231 auto Add = [&](Value *Sum, Value *Offset) -> Value * {
232 if (Sum)
233 return Builder.CreateAdd(Sum, Offset, "", NW.hasNoUnsignedWrap(),
234 NW.isInBounds());
235 else
236 return Offset;
237 };
238
239 Value *Sum = nullptr;
240 Value *OneUseSum = nullptr;
241 Value *OneUseBase = nullptr;
242 GEPNoWrapFlags OneUseFlags = GEPNoWrapFlags::all();
243 for (GEPOperator *GEP : reverse(GEPs)) {
244 Value *Offset;
245 {
246 // Expand the offset at the point of the previous GEP to enable rewriting.
247 // However, use the original insertion point for calculating Sum.
248 IRBuilderBase::InsertPointGuard Guard(Builder);
249 auto *Inst = dyn_cast<Instruction>(GEP);
250 if (RewriteGEPs && Inst)
251 Builder.SetInsertPoint(Inst);
252
254 if (Offset->getType() != IdxTy)
255 Offset = Builder.CreateVectorSplat(
256 cast<VectorType>(IdxTy)->getElementCount(), Offset);
257 if (GEP->hasOneUse()) {
258 // Offsets of one-use GEPs will be merged into the next multi-use GEP.
259 OneUseSum = Add(OneUseSum, Offset);
260 OneUseFlags = OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags());
261 if (!OneUseBase)
262 OneUseBase = GEP->getPointerOperand();
263 continue;
264 }
265
266 if (OneUseSum)
267 Offset = Add(OneUseSum, Offset);
268
269 // Rewrite the GEP to reuse the computed offset. This also includes
270 // offsets from preceding one-use GEPs of matched type.
271 if (RewriteGEPs && Inst &&
272 Offset->getType()->isVectorTy() == GEP->getType()->isVectorTy() &&
273 !(GEP->getSourceElementType()->isIntegerTy(8) &&
274 GEP->getOperand(1) == Offset)) {
276 *Inst,
277 Builder.CreatePtrAdd(
278 OneUseBase ? OneUseBase : GEP->getPointerOperand(), Offset, "",
279 OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags())));
281 }
282 }
283
284 Sum = Add(Sum, Offset);
285 OneUseSum = OneUseBase = nullptr;
286 OneUseFlags = GEPNoWrapFlags::all();
287 }
288 if (OneUseSum)
289 Sum = Add(Sum, OneUseSum);
290 if (!Sum)
291 return Constant::getNullValue(IdxTy);
292 return Sum;
293}
294
295/// Legal integers and common types are considered desirable. This is used to
296/// avoid creating instructions with types that may not be supported well by the
297/// the backend.
298/// NOTE: This treats i8, i16 and i32 specially because they are common
299/// types in frontend languages.
300bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
301 switch (BitWidth) {
302 case 8:
303 case 16:
304 case 32:
305 return true;
306 default:
307 return DL.isLegalInteger(BitWidth);
308 }
309}
310
311/// Return true if it is desirable to convert an integer computation from a
312/// given bit width to a new bit width.
313/// We don't want to convert from a legal or desirable type (like i8) to an
314/// illegal type or from a smaller to a larger illegal type. A width of '1'
315/// is always treated as a desirable type because i1 is a fundamental type in
316/// IR, and there are many specialized optimizations for i1 types.
317/// Common/desirable widths are equally treated as legal to convert to, in
318/// order to open up more combining opportunities.
319bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
320 unsigned ToWidth) const {
321 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
322 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
323
324 // Convert to desirable widths even if they are not legal types.
325 // Only shrink types, to prevent infinite loops.
326 if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
327 return true;
328
329 // If this is a legal or desiable integer from type, and the result would be
330 // an illegal type, don't do the transformation.
331 if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal)
332 return false;
333
334 // Otherwise, if both are illegal, do not increase the size of the result. We
335 // do allow things like i160 -> i64, but not i64 -> i160.
336 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
337 return false;
338
339 return true;
340}
341
342/// Return true if it is desirable to convert a computation from 'From' to 'To'.
343/// We don't want to convert from a legal to an illegal type or from a smaller
344/// to a larger illegal type. i1 is always treated as a legal type because it is
345/// a fundamental type in IR, and there are many specialized optimizations for
346/// i1 types.
347bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
348 // TODO: This could be extended to allow vectors. Datalayout changes might be
349 // needed to properly support that.
350 if (!From->isIntegerTy() || !To->isIntegerTy())
351 return false;
352
353 unsigned FromWidth = From->getPrimitiveSizeInBits();
354 unsigned ToWidth = To->getPrimitiveSizeInBits();
355 return shouldChangeType(FromWidth, ToWidth);
356}
357
358// Return true, if No Signed Wrap should be maintained for I.
359// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
360// where both B and C should be ConstantInts, results in a constant that does
361// not overflow. This function only handles the Add/Sub/Mul opcodes. For
362// all other opcodes, the function conservatively returns false.
365 if (!OBO || !OBO->hasNoSignedWrap())
366 return false;
367
368 const APInt *BVal, *CVal;
369 if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
370 return false;
371
372 // We reason about Add/Sub/Mul Only.
373 bool Overflow = false;
374 switch (I.getOpcode()) {
375 case Instruction::Add:
376 (void)BVal->sadd_ov(*CVal, Overflow);
377 break;
378 case Instruction::Sub:
379 (void)BVal->ssub_ov(*CVal, Overflow);
380 break;
381 case Instruction::Mul:
382 (void)BVal->smul_ov(*CVal, Overflow);
383 break;
384 default:
385 // Conservatively return false for other opcodes.
386 return false;
387 }
388 return !Overflow;
389}
390
393 return OBO && OBO->hasNoUnsignedWrap();
394}
395
398 return OBO && OBO->hasNoSignedWrap();
399}
400
401/// Combine constant operands of associative operations either before or after a
402/// cast to eliminate one of the associative operations:
403/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
404/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
406 InstCombinerImpl &IC) {
407 auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
408 if (!Cast || !Cast->hasOneUse())
409 return false;
410
411 // TODO: Enhance logic for other casts and remove this check.
412 auto CastOpcode = Cast->getOpcode();
413 if (CastOpcode != Instruction::ZExt)
414 return false;
415
416 // TODO: Enhance logic for other BinOps and remove this check.
417 if (!BinOp1->isBitwiseLogicOp())
418 return false;
419
420 auto AssocOpcode = BinOp1->getOpcode();
421 auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
422 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
423 return false;
424
425 Constant *C1, *C2;
426 if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
427 !match(BinOp2->getOperand(1), m_Constant(C2)))
428 return false;
429
430 // TODO: This assumes a zext cast.
431 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
432 // to the destination type might lose bits.
433
434 // Fold the constants together in the destination type:
435 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
436 const DataLayout &DL = IC.getDataLayout();
437 Type *DestTy = C1->getType();
438 Constant *CastC2 = ConstantFoldCastOperand(CastOpcode, C2, DestTy, DL);
439 if (!CastC2)
440 return false;
441 Constant *FoldedC = ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, DL);
442 if (!FoldedC)
443 return false;
444
445 IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
446 IC.replaceOperand(*BinOp1, 1, FoldedC);
448 Cast->dropPoisonGeneratingFlags();
449 return true;
450}
451
452// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
453// inttoptr ( ptrtoint (x) ) --> x
454Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
455 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
456 if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) ==
457 DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
458 auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
459 Type *CastTy = IntToPtr->getDestTy();
460 if (PtrToInt &&
461 CastTy->getPointerAddressSpace() ==
462 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
463 DL.getTypeSizeInBits(PtrToInt->getSrcTy()) ==
464 DL.getTypeSizeInBits(PtrToInt->getDestTy()))
465 return PtrToInt->getOperand(0);
466 }
467 return nullptr;
468}
469
470/// This performs a few simplifications for operators that are associative or
471/// commutative:
472///
473/// Commutative operators:
474///
475/// 1. Order operands such that they are listed from right (least complex) to
476/// left (most complex). This puts constants before unary operators before
477/// binary operators.
478///
479/// Associative operators:
480///
481/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
482/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
483///
484/// Associative and commutative operators:
485///
486/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
487/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
488/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
489/// if C1 and C2 are constants.
491 Instruction::BinaryOps Opcode = I.getOpcode();
492 bool Changed = false;
493
494 do {
495 // Order operands such that they are listed from right (least complex) to
496 // left (most complex). This puts constants before unary operators before
497 // binary operators.
498 if (I.isCommutative() && getComplexity(I.getOperand(0)) <
499 getComplexity(I.getOperand(1)))
500 Changed = !I.swapOperands();
501
502 if (I.isCommutative()) {
503 if (auto Pair = matchSymmetricPair(I.getOperand(0), I.getOperand(1))) {
504 replaceOperand(I, 0, Pair->first);
505 replaceOperand(I, 1, Pair->second);
506 Changed = true;
507 }
508 }
509
510 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
511 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
512
513 if (I.isAssociative()) {
514 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
515 if (Op0 && Op0->getOpcode() == Opcode) {
516 Value *A = Op0->getOperand(0);
517 Value *B = Op0->getOperand(1);
518 Value *C = I.getOperand(1);
519
520 // Does "B op C" simplify?
521 if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
522 // It simplifies to V. Form "A op V".
523 replaceOperand(I, 0, A);
524 replaceOperand(I, 1, V);
525 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
526 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
527
528 // Conservatively clear all optional flags since they may not be
529 // preserved by the reassociation. Reset nsw/nuw based on the above
530 // analysis.
531 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(&I))
532 PDI->setIsDisjoint(false);
533
534 // Note: this is only valid because SimplifyBinOp doesn't look at
535 // the operands to Op0.
537 I.setHasNoUnsignedWrap(IsNUW);
538 I.setHasNoSignedWrap(IsNSW);
539 }
540
541 Changed = true;
542 ++NumReassoc;
543 continue;
544 }
545 }
546
547 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
548 if (Op1 && Op1->getOpcode() == Opcode) {
549 Value *A = I.getOperand(0);
550 Value *B = Op1->getOperand(0);
551 Value *C = Op1->getOperand(1);
552
553 // Does "A op B" simplify?
554 if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
555 // It simplifies to V. Form "V op C".
556 replaceOperand(I, 0, V);
557 replaceOperand(I, 1, C);
558 // Conservatively clear the optional flags, since they may not be
559 // preserved by the reassociation.
561 I.dropPoisonGeneratingFlags();
562 Changed = true;
563 ++NumReassoc;
564 continue;
565 }
566 }
567 }
568
569 if (I.isAssociative() && I.isCommutative()) {
570 if (simplifyAssocCastAssoc(&I, *this)) {
571 Changed = true;
572 ++NumReassoc;
573 continue;
574 }
575
576 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
577 if (Op0 && Op0->getOpcode() == Opcode) {
578 Value *A = Op0->getOperand(0);
579 Value *B = Op0->getOperand(1);
580 Value *C = I.getOperand(1);
581
582 // Does "C op A" simplify?
583 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
584 // It simplifies to V. Form "V op B".
585 replaceOperand(I, 0, V);
586 replaceOperand(I, 1, B);
587 // Conservatively clear the optional flags, since they may not be
588 // preserved by the reassociation.
590 I.dropPoisonGeneratingFlags();
591 Changed = true;
592 ++NumReassoc;
593 continue;
594 }
595 }
596
597 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
598 if (Op1 && Op1->getOpcode() == Opcode) {
599 Value *A = I.getOperand(0);
600 Value *B = Op1->getOperand(0);
601 Value *C = Op1->getOperand(1);
602
603 // Does "C op A" simplify?
604 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
605 // It simplifies to V. Form "B op V".
606 replaceOperand(I, 0, B);
607 replaceOperand(I, 1, V);
608 // Conservatively clear the optional flags, since they may not be
609 // preserved by the reassociation.
611 I.dropPoisonGeneratingFlags();
612 Changed = true;
613 ++NumReassoc;
614 continue;
615 }
616 }
617
618 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
619 // if C1 and C2 are constants.
620 Value *A, *B;
621 Constant *C1, *C2, *CRes;
622 if (Op0 && Op1 &&
623 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
624 match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
625 match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
626 (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
627 bool IsNUW = hasNoUnsignedWrap(I) &&
628 hasNoUnsignedWrap(*Op0) &&
629 hasNoUnsignedWrap(*Op1);
630 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
631 BinaryOperator::CreateNUW(Opcode, A, B) :
632 BinaryOperator::Create(Opcode, A, B);
633
634 if (isa<FPMathOperator>(NewBO)) {
635 FastMathFlags Flags = I.getFastMathFlags() &
636 Op0->getFastMathFlags() &
637 Op1->getFastMathFlags();
638 NewBO->setFastMathFlags(Flags);
639 }
640 InsertNewInstWith(NewBO, I.getIterator());
641 NewBO->takeName(Op1);
642 replaceOperand(I, 0, NewBO);
643 replaceOperand(I, 1, CRes);
644 // Conservatively clear the optional flags, since they may not be
645 // preserved by the reassociation.
647 I.dropPoisonGeneratingFlags();
648 if (IsNUW)
649 I.setHasNoUnsignedWrap(true);
650
651 Changed = true;
652 continue;
653 }
654 }
655
656 // No further simplifications.
657 return Changed;
658 } while (true);
659}
660
661/// Return whether "X LOp (Y ROp Z)" is always equal to
662/// "(X LOp Y) ROp (X LOp Z)".
665 // X & (Y | Z) <--> (X & Y) | (X & Z)
666 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
667 if (LOp == Instruction::And)
668 return ROp == Instruction::Or || ROp == Instruction::Xor;
669
670 // X | (Y & Z) <--> (X | Y) & (X | Z)
671 if (LOp == Instruction::Or)
672 return ROp == Instruction::And;
673
674 // X * (Y + Z) <--> (X * Y) + (X * Z)
675 // X * (Y - Z) <--> (X * Y) - (X * Z)
676 if (LOp == Instruction::Mul)
677 return ROp == Instruction::Add || ROp == Instruction::Sub;
678
679 return false;
680}
681
682/// Return whether "(X LOp Y) ROp Z" is always equal to
683/// "(X ROp Z) LOp (Y ROp Z)".
687 return leftDistributesOverRight(ROp, LOp);
688
689 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
691
692 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
693 // but this requires knowing that the addition does not overflow and other
694 // such subtleties.
695}
696
697/// This function returns identity value for given opcode, which can be used to
698/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
700 if (isa<Constant>(V))
701 return nullptr;
702
703 return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
704}
705
706/// This function predicates factorization using distributive laws. By default,
707/// it just returns the 'Op' inputs. But for special-cases like
708/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
709/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
710/// allow more factorization opportunities.
713 Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
714 assert(Op && "Expected a binary operator");
715 LHS = Op->getOperand(0);
716 RHS = Op->getOperand(1);
717 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
718 Constant *C;
719 if (match(Op, m_Shl(m_Value(), m_ImmConstant(C)))) {
720 // X << C --> X * (1 << C)
722 Instruction::Shl, ConstantInt::get(Op->getType(), 1), C);
723 assert(RHS && "Constant folding of immediate constants failed");
724 return Instruction::Mul;
725 }
726 // TODO: We can add other conversions e.g. shr => div etc.
727 }
728 if (Instruction::isBitwiseLogicOp(TopOpcode)) {
729 if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
731 // lshr nneg C, X --> ashr nneg C, X
732 return Instruction::AShr;
733 }
734 }
735 return Op->getOpcode();
736}
737
738/// This tries to simplify binary operations by factorizing out common terms
739/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
742 Instruction::BinaryOps InnerOpcode, Value *A,
743 Value *B, Value *C, Value *D) {
744 assert(A && B && C && D && "All values must be provided");
745
746 Value *V = nullptr;
747 Value *RetVal = nullptr;
748 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
749 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
750
751 // Does "X op' Y" always equal "Y op' X"?
752 bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
753
754 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
755 if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
756 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
757 // commutative case, "(A op' B) op (C op' A)"?
758 if (A == C || (InnerCommutative && A == D)) {
759 if (A != C)
760 std::swap(C, D);
761 // Consider forming "A op' (B op D)".
762 // If "B op D" simplifies then it can be formed with no cost.
763 V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
764
765 // If "B op D" doesn't simplify then only go on if one of the existing
766 // operations "A op' B" and "C op' D" will be zapped as no longer used.
767 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
768 V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
769 if (V)
770 RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
771 }
772 }
773
774 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
775 if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
776 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
777 // commutative case, "(A op' B) op (B op' D)"?
778 if (B == D || (InnerCommutative && B == C)) {
779 if (B != D)
780 std::swap(C, D);
781 // Consider forming "(A op C) op' B".
782 // If "A op C" simplifies then it can be formed with no cost.
783 V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
784
785 // If "A op C" doesn't simplify then only go on if one of the existing
786 // operations "A op' B" and "C op' D" will be zapped as no longer used.
787 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
788 V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
789 if (V)
790 RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
791 }
792 }
793
794 if (!RetVal)
795 return nullptr;
796
797 ++NumFactor;
798 RetVal->takeName(&I);
799
800 // Try to add no-overflow flags to the final value.
801 if (isa<BinaryOperator>(RetVal)) {
802 bool HasNSW = false;
803 bool HasNUW = false;
805 HasNSW = I.hasNoSignedWrap();
806 HasNUW = I.hasNoUnsignedWrap();
807 }
808 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
809 HasNSW &= LOBO->hasNoSignedWrap();
810 HasNUW &= LOBO->hasNoUnsignedWrap();
811 }
812
813 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
814 HasNSW &= ROBO->hasNoSignedWrap();
815 HasNUW &= ROBO->hasNoUnsignedWrap();
816 }
817
818 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
819 // We can propagate 'nsw' if we know that
820 // %Y = mul nsw i16 %X, C
821 // %Z = add nsw i16 %Y, %X
822 // =>
823 // %Z = mul nsw i16 %X, C+1
824 //
825 // iff C+1 isn't INT_MIN
826 const APInt *CInt;
827 if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
828 cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
829
830 // nuw can be propagated with any constant or nuw value.
831 cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
832 }
833 }
834 return RetVal;
835}
836
837// If `I` has one Const operand and the other matches `(ctpop (not x))`,
838// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
839// This is only useful is the new subtract can fold so we only handle the
840// following cases:
841// 1) (add/sub/disjoint_or C, (ctpop (not x))
842// -> (add/sub/disjoint_or C', (ctpop x))
843// 1) (cmp pred C, (ctpop (not x))
844// -> (cmp pred C', (ctpop x))
846 unsigned Opc = I->getOpcode();
847 unsigned ConstIdx = 1;
848 switch (Opc) {
849 default:
850 return nullptr;
851 // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
852 // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
853 // is constant.
854 case Instruction::Sub:
855 ConstIdx = 0;
856 break;
857 case Instruction::ICmp:
858 // Signed predicates aren't correct in some edge cases like for i2 types, as
859 // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
860 // comparisons against it are simplfied to unsigned.
861 if (cast<ICmpInst>(I)->isSigned())
862 return nullptr;
863 break;
864 case Instruction::Or:
865 if (!match(I, m_DisjointOr(m_Value(), m_Value())))
866 return nullptr;
867 [[fallthrough]];
868 case Instruction::Add:
869 break;
870 }
871
872 Value *Op;
873 // Find ctpop.
874 if (!match(I->getOperand(1 - ConstIdx), m_OneUse(m_Ctpop(m_Value(Op)))))
875 return nullptr;
876
877 Constant *C;
878 // Check other operand is ImmConstant.
879 if (!match(I->getOperand(ConstIdx), m_ImmConstant(C)))
880 return nullptr;
881
882 Type *Ty = Op->getType();
883 Constant *BitWidthC = ConstantInt::get(Ty, Ty->getScalarSizeInBits());
884 // Need extra check for icmp. Note if this check is true, it generally means
885 // the icmp will simplify to true/false.
886 if (Opc == Instruction::ICmp && !cast<ICmpInst>(I)->isEquality()) {
887 Constant *Cmp =
889 if (!Cmp || !Cmp->isNullValue())
890 return nullptr;
891 }
892
893 // Check we can invert `(not x)` for free.
894 bool Consumes = false;
895 if (!isFreeToInvert(Op, Op->hasOneUse(), Consumes) || !Consumes)
896 return nullptr;
897 Value *NotOp = getFreelyInverted(Op, Op->hasOneUse(), &Builder);
898 assert(NotOp != nullptr &&
899 "Desync between isFreeToInvert and getFreelyInverted");
900
901 Value *CtpopOfNotOp = Builder.CreateIntrinsic(Ty, Intrinsic::ctpop, NotOp);
902
903 Value *R = nullptr;
904
905 // Do the transformation here to avoid potentially introducing an infinite
906 // loop.
907 switch (Opc) {
908 case Instruction::Sub:
909 R = Builder.CreateAdd(CtpopOfNotOp, ConstantExpr::getSub(C, BitWidthC));
910 break;
911 case Instruction::Or:
912 case Instruction::Add:
913 R = Builder.CreateSub(ConstantExpr::getAdd(C, BitWidthC), CtpopOfNotOp);
914 break;
915 case Instruction::ICmp:
916 R = Builder.CreateICmp(cast<ICmpInst>(I)->getSwappedPredicate(),
917 CtpopOfNotOp, ConstantExpr::getSub(BitWidthC, C));
918 break;
919 default:
920 llvm_unreachable("Unhandled Opcode");
921 }
922 assert(R != nullptr);
923 return replaceInstUsesWith(*I, R);
924}
925
926// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
927// IFF
928// 1) the logic_shifts match
929// 2) either both binops are binops and one is `and` or
930// BinOp1 is `and`
931// (logic_shift (inv_logic_shift C1, C), C) == C1 or
932//
933// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
934//
935// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
936// IFF
937// 1) the logic_shifts match
938// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
939//
940// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
941//
942// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
943// IFF
944// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
945// 2) Binop2 is `not`
946//
947// -> (arithmetic_shift Binop1((not X), Y), Amt)
948
950 const DataLayout &DL = I.getDataLayout();
951 auto IsValidBinOpc = [](unsigned Opc) {
952 switch (Opc) {
953 default:
954 return false;
955 case Instruction::And:
956 case Instruction::Or:
957 case Instruction::Xor:
958 case Instruction::Add:
959 // Skip Sub as we only match constant masks which will canonicalize to use
960 // add.
961 return true;
962 }
963 };
964
965 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
966 // constraints.
967 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
968 unsigned ShOpc) {
969 assert(ShOpc != Instruction::AShr);
970 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
971 ShOpc == Instruction::Shl;
972 };
973
974 auto GetInvShift = [](unsigned ShOpc) {
975 assert(ShOpc != Instruction::AShr);
976 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
977 };
978
979 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
980 unsigned ShOpc, Constant *CMask,
981 Constant *CShift) {
982 // If the BinOp1 is `and` we don't need to check the mask.
983 if (BinOpc1 == Instruction::And)
984 return true;
985
986 // For all other possible transfers we need complete distributable
987 // binop/shift (anything but `add` + `lshr`).
988 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
989 return false;
990
991 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
992 // vecs, otherwise the mask will be simplified and the following check will
993 // handle it).
994 if (BinOpc2 == Instruction::And)
995 return true;
996
997 // Otherwise, need mask that meets the below requirement.
998 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
999 Constant *MaskInvShift =
1000 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
1001 return ConstantFoldBinaryOpOperands(ShOpc, MaskInvShift, CShift, DL) ==
1002 CMask;
1003 };
1004
1005 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
1006 Constant *CMask, *CShift;
1007 Value *X, *Y, *ShiftedX, *Mask, *Shift;
1008 if (!match(I.getOperand(ShOpnum),
1009 m_OneUse(m_Shift(m_Value(Y), m_Value(Shift)))))
1010 return nullptr;
1011 if (!match(
1012 I.getOperand(1 - ShOpnum),
1015 m_Value(ShiftedX)),
1016 m_Value(Mask)))))
1017 return nullptr;
1018 // Make sure we are matching instruction shifts and not ConstantExpr
1019 auto *IY = dyn_cast<Instruction>(I.getOperand(ShOpnum));
1020 auto *IX = dyn_cast<Instruction>(ShiftedX);
1021 if (!IY || !IX)
1022 return nullptr;
1023
1024 // LHS and RHS need same shift opcode
1025 unsigned ShOpc = IY->getOpcode();
1026 if (ShOpc != IX->getOpcode())
1027 return nullptr;
1028
1029 // Make sure binop is real instruction and not ConstantExpr
1030 auto *BO2 = dyn_cast<Instruction>(I.getOperand(1 - ShOpnum));
1031 if (!BO2)
1032 return nullptr;
1033
1034 unsigned BinOpc = BO2->getOpcode();
1035 // Make sure we have valid binops.
1036 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
1037 return nullptr;
1038
1039 if (ShOpc == Instruction::AShr) {
1040 if (Instruction::isBitwiseLogicOp(I.getOpcode()) &&
1041 BinOpc == Instruction::Xor && match(Mask, m_AllOnes())) {
1042 Value *NotX = Builder.CreateNot(X);
1043 Value *NewBinOp = Builder.CreateBinOp(I.getOpcode(), Y, NotX);
1045 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp, Shift);
1046 }
1047
1048 return nullptr;
1049 }
1050
1051 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
1052 // distribute to drop the shift irrelevant of constants.
1053 if (BinOpc == I.getOpcode() &&
1054 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
1055 Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y);
1056 Value *NewBinOp1 = Builder.CreateBinOp(
1057 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp2, Shift);
1058 return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask);
1059 }
1060
1061 // Otherwise we can only distribute by constant shifting the mask, so
1062 // ensure we have constants.
1063 if (!match(Shift, m_ImmConstant(CShift)))
1064 return nullptr;
1065 if (!match(Mask, m_ImmConstant(CMask)))
1066 return nullptr;
1067
1068 // Check if we can distribute the binops.
1069 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
1070 return nullptr;
1071
1072 Constant *NewCMask =
1073 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
1074 Value *NewBinOp2 = Builder.CreateBinOp(
1075 static_cast<Instruction::BinaryOps>(BinOpc), X, NewCMask);
1076 Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2);
1077 return BinaryOperator::Create(static_cast<Instruction::BinaryOps>(ShOpc),
1078 NewBinOp1, CShift);
1079 };
1080
1081 if (Instruction *R = MatchBinOp(0))
1082 return R;
1083 return MatchBinOp(1);
1084}
1085
1086// (Binop (zext C), (select C, T, F))
1087// -> (select C, (binop 1, T), (binop 0, F))
1088//
1089// (Binop (sext C), (select C, T, F))
1090// -> (select C, (binop -1, T), (binop 0, F))
1091//
1092// Attempt to simplify binary operations into a select with folded args, when
1093// one operand of the binop is a select instruction and the other operand is a
1094// zext/sext extension, whose value is the select condition.
1097 // TODO: this simplification may be extended to any speculatable instruction,
1098 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
1099 Instruction::BinaryOps Opc = I.getOpcode();
1100 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1101 Value *A, *CondVal, *TrueVal, *FalseVal;
1102 Value *CastOp;
1103 Constant *CastTrueVal, *CastFalseVal;
1104
1105 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
1106 return match(CastOp, m_SelectLike(m_Value(A), m_Constant(CastTrueVal),
1107 m_Constant(CastFalseVal))) &&
1108 match(SelectOp, m_Select(m_Value(CondVal), m_Value(TrueVal),
1109 m_Value(FalseVal)));
1110 };
1111
1112 // Make sure one side of the binop is a select instruction, and the other is a
1113 // zero/sign extension operating on a i1.
1114 if (MatchSelectAndCast(LHS, RHS))
1115 CastOp = LHS;
1116 else if (MatchSelectAndCast(RHS, LHS))
1117 CastOp = RHS;
1118 else
1119 return nullptr;
1120
1122 ? nullptr
1123 : cast<SelectInst>(CastOp == LHS ? RHS : LHS);
1124
1125 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1126 bool IsCastOpRHS = (CastOp == RHS);
1127 Value *CastVal = IsTrueArm ? CastFalseVal : CastTrueVal;
1128
1129 return IsCastOpRHS ? Builder.CreateBinOp(Opc, V, CastVal)
1130 : Builder.CreateBinOp(Opc, CastVal, V);
1131 };
1132
1133 // If the value used in the zext/sext is the select condition, or the negated
1134 // of the select condition, the binop can be simplified.
1135 if (CondVal == A) {
1136 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
1137 return SelectInst::Create(CondVal, NewTrueVal,
1138 NewFoldedConst(true, FalseVal), "", nullptr, SI);
1139 }
1140 if (match(A, m_Not(m_Specific(CondVal)))) {
1141 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
1142 return SelectInst::Create(CondVal, NewTrueVal,
1143 NewFoldedConst(false, FalseVal), "", nullptr, SI);
1144 }
1145
1146 return nullptr;
1147}
1148
1150 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1153 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1154 Value *A, *B, *C, *D;
1155 Instruction::BinaryOps LHSOpcode, RHSOpcode;
1156
1157 if (Op0)
1158 LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B, Op1);
1159 if (Op1)
1160 RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D, Op0);
1161
1162 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1163 // a common term.
1164 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1165 if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D))
1166 return V;
1167
1168 // The instruction has the form "(A op' B) op (C)". Try to factorize common
1169 // term.
1170 if (Op0)
1171 if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
1172 if (Value *V =
1173 tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident))
1174 return V;
1175
1176 // The instruction has the form "(B) op (C op' D)". Try to factorize common
1177 // term.
1178 if (Op1)
1179 if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
1180 if (Value *V =
1181 tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D))
1182 return V;
1183
1184 return nullptr;
1185}
1186
1187/// This tries to simplify binary operations which some other binary operation
1188/// distributes over either by factorizing out common terms
1189/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
1190/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
1191/// Returns the simplified value, or null if it didn't simplify.
1193 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1196 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1197
1198 // Factorization.
1199 if (Value *R = tryFactorizationFolds(I))
1200 return R;
1201
1202 // Expansion.
1203 if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
1204 // The instruction has the form "(A op' B) op C". See if expanding it out
1205 // to "(A op C) op' (B op C)" results in simplifications.
1206 Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
1207 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1208
1209 // Disable the use of undef because it's not safe to distribute undef.
1210 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1211 Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1212 Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
1213
1214 // Do "A op C" and "B op C" both simplify?
1215 if (L && R) {
1216 // They do! Return "L op' R".
1217 ++NumExpand;
1218 C = Builder.CreateBinOp(InnerOpcode, L, R);
1219 C->takeName(&I);
1220 return C;
1221 }
1222
1223 // Does "A op C" simplify to the identity value for the inner opcode?
1224 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1225 // They do! Return "B op C".
1226 ++NumExpand;
1227 C = Builder.CreateBinOp(TopLevelOpcode, B, C);
1228 C->takeName(&I);
1229 return C;
1230 }
1231
1232 // Does "B op C" simplify to the identity value for the inner opcode?
1233 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1234 // They do! Return "A op C".
1235 ++NumExpand;
1236 C = Builder.CreateBinOp(TopLevelOpcode, A, C);
1237 C->takeName(&I);
1238 return C;
1239 }
1240 }
1241
1242 if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
1243 // The instruction has the form "A op (B op' C)". See if expanding it out
1244 // to "(A op B) op' (A op C)" results in simplifications.
1245 Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
1246 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1247
1248 // Disable the use of undef because it's not safe to distribute undef.
1249 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1250 Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
1251 Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1252
1253 // Do "A op B" and "A op C" both simplify?
1254 if (L && R) {
1255 // They do! Return "L op' R".
1256 ++NumExpand;
1257 A = Builder.CreateBinOp(InnerOpcode, L, R);
1258 A->takeName(&I);
1259 return A;
1260 }
1261
1262 // Does "A op B" simplify to the identity value for the inner opcode?
1263 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1264 // They do! Return "A op C".
1265 ++NumExpand;
1266 A = Builder.CreateBinOp(TopLevelOpcode, A, C);
1267 A->takeName(&I);
1268 return A;
1269 }
1270
1271 // Does "A op C" simplify to the identity value for the inner opcode?
1272 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1273 // They do! Return "A op B".
1274 ++NumExpand;
1275 A = Builder.CreateBinOp(TopLevelOpcode, A, B);
1276 A->takeName(&I);
1277 return A;
1278 }
1279 }
1280
1281 return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
1282}
1283
1284static std::optional<std::pair<Value *, Value *>>
1286 if (LHS->getParent() != RHS->getParent())
1287 return std::nullopt;
1288
1289 if (LHS->getNumIncomingValues() < 2)
1290 return std::nullopt;
1291
1292 if (!equal(LHS->blocks(), RHS->blocks()))
1293 return std::nullopt;
1294
1295 Value *L0 = LHS->getIncomingValue(0);
1296 Value *R0 = RHS->getIncomingValue(0);
1297
1298 for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
1299 Value *L1 = LHS->getIncomingValue(I);
1300 Value *R1 = RHS->getIncomingValue(I);
1301
1302 if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
1303 continue;
1304
1305 return std::nullopt;
1306 }
1307
1308 return std::optional(std::pair(L0, R0));
1309}
1310
1311std::optional<std::pair<Value *, Value *>>
1312InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) {
1315 if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode())
1316 return std::nullopt;
1317 switch (LHSInst->getOpcode()) {
1318 case Instruction::PHI:
1320 case Instruction::Select: {
1321 Value *Cond = LHSInst->getOperand(0);
1322 Value *TrueVal = LHSInst->getOperand(1);
1323 Value *FalseVal = LHSInst->getOperand(2);
1324 if (Cond == RHSInst->getOperand(0) && TrueVal == RHSInst->getOperand(2) &&
1325 FalseVal == RHSInst->getOperand(1))
1326 return std::pair(TrueVal, FalseVal);
1327 return std::nullopt;
1328 }
1329 case Instruction::Call: {
1330 // Match min(a, b) and max(a, b)
1331 MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(LHSInst);
1332 MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(RHSInst);
1333 if (LHSMinMax && RHSMinMax &&
1334 LHSMinMax->getPredicate() ==
1336 ((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1337 LHSMinMax->getRHS() == RHSMinMax->getRHS()) ||
1338 (LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1339 LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1340 return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1341 return std::nullopt;
1342 }
1343 default:
1344 return std::nullopt;
1345 }
1346}
1347
1349 Value *LHS,
1350 Value *RHS) {
1351 Value *A, *B, *C, *D, *E, *F;
1352 bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
1353 bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
1354 if (!LHSIsSelect && !RHSIsSelect)
1355 return nullptr;
1356
1358 ? nullptr
1359 : cast<SelectInst>(LHSIsSelect ? LHS : RHS);
1360
1361 FastMathFlags FMF;
1363 if (const auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
1364 FMF = FPOp->getFastMathFlags();
1365 Builder.setFastMathFlags(FMF);
1366 }
1367
1368 Instruction::BinaryOps Opcode = I.getOpcode();
1369 SimplifyQuery Q = SQ.getWithInstruction(&I);
1370
1371 Value *Cond, *True = nullptr, *False = nullptr;
1372
1373 // Special-case for add/negate combination. Replace the zero in the negation
1374 // with the trailing add operand:
1375 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1376 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1377 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1378 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1379 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1380 return nullptr;
1381 Value *N;
1382 if (True && match(FVal, m_Neg(m_Value(N)))) {
1383 Value *Sub = Builder.CreateSub(Z, N);
1384 return Builder.CreateSelect(Cond, True, Sub, I.getName(), SI);
1385 }
1386 if (False && match(TVal, m_Neg(m_Value(N)))) {
1387 Value *Sub = Builder.CreateSub(Z, N);
1388 return Builder.CreateSelect(Cond, Sub, False, I.getName(), SI);
1389 }
1390 return nullptr;
1391 };
1392
1393 if (LHSIsSelect && RHSIsSelect && A == D) {
1394 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1395 Cond = A;
1396 True = simplifyBinOp(Opcode, B, E, FMF, Q);
1397 False = simplifyBinOp(Opcode, C, F, FMF, Q);
1398
1399 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1400 if (False && !True)
1401 True = Builder.CreateBinOp(Opcode, B, E);
1402 else if (True && !False)
1403 False = Builder.CreateBinOp(Opcode, C, F);
1404 }
1405 } else if (LHSIsSelect && LHS->hasOneUse()) {
1406 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1407 Cond = A;
1408 True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
1409 False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
1410 if (Value *NewSel = foldAddNegate(B, C, RHS))
1411 return NewSel;
1412 } else if (RHSIsSelect && RHS->hasOneUse()) {
1413 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1414 Cond = D;
1415 True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
1416 False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
1417 if (Value *NewSel = foldAddNegate(E, F, LHS))
1418 return NewSel;
1419 }
1420
1421 if (!True || !False)
1422 return nullptr;
1423
1424 Value *NewSI = Builder.CreateSelect(Cond, True, False, I.getName(), SI);
1425 NewSI->takeName(&I);
1426 return NewSI;
1427}
1428
1429/// Freely adapt every user of V as-if V was changed to !V.
1430/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1432 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1433 for (User *U : make_early_inc_range(I->users())) {
1434 if (U == IgnoredUser)
1435 continue; // Don't consider this user.
1436 switch (cast<Instruction>(U)->getOpcode()) {
1437 case Instruction::Select: {
1438 auto *SI = cast<SelectInst>(U);
1439 SI->swapValues();
1440 SI->swapProfMetadata();
1441 break;
1442 }
1443 case Instruction::CondBr: {
1445 BI->swapSuccessors(); // swaps prof metadata too
1446 if (BPI)
1447 BPI->swapSuccEdgesProbabilities(BI->getParent());
1448 break;
1449 }
1450 case Instruction::Xor:
1452 // Add to worklist for DCE.
1454 break;
1455 default:
1456 llvm_unreachable("Got unexpected user - out of sync with "
1457 "canFreelyInvertAllUsersOf() ?");
1458 }
1459 }
1460
1461 // Update pre-existing debug value uses.
1462 SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
1463 llvm::findDbgValues(I, DbgVariableRecords);
1464
1465 for (DbgVariableRecord *DbgVal : DbgVariableRecords) {
1466 SmallVector<uint64_t, 1> Ops = {dwarf::DW_OP_not};
1467 for (unsigned Idx = 0, End = DbgVal->getNumVariableLocationOps();
1468 Idx != End; ++Idx)
1469 if (DbgVal->getVariableLocationOp(Idx) == I)
1470 DbgVal->setExpression(
1471 DIExpression::appendOpsToArg(DbgVal->getExpression(), Ops, Idx));
1472 }
1473}
1474
1475/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1476/// constant zero (which is the 'negate' form).
1477Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1478 Value *NegV;
1479 if (match(V, m_Neg(m_Value(NegV))))
1480 return NegV;
1481
1482 // Constants can be considered to be negated values if they can be folded.
1484 return ConstantExpr::getNeg(C);
1485
1487 if (C->getType()->getElementType()->isIntegerTy())
1488 return ConstantExpr::getNeg(C);
1489
1491 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1492 Constant *Elt = CV->getAggregateElement(i);
1493 if (!Elt)
1494 return nullptr;
1495
1496 if (isa<UndefValue>(Elt))
1497 continue;
1498
1499 if (!isa<ConstantInt>(Elt))
1500 return nullptr;
1501 }
1502 return ConstantExpr::getNeg(CV);
1503 }
1504
1505 // Negate integer vector splats.
1506 if (auto *CV = dyn_cast<Constant>(V))
1507 if (CV->getType()->isVectorTy() &&
1508 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1509 return ConstantExpr::getNeg(CV);
1510
1511 return nullptr;
1512}
1513
1514// Try to fold:
1515// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1516// -> ({s|u}itofp (int_binop x, y))
1517// 2) (fp_binop ({s|u}itofp x), FpC)
1518// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1519//
1520// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1521Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1522 BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
1524
1525 Type *FPTy = BO.getType();
1526 Type *IntTy = IntOps[0]->getType();
1527
1528 unsigned IntSz = IntTy->getScalarSizeInBits();
1529 // This is the maximum number of inuse bits by the integer where the int -> fp
1530 // casts are exact.
1531 unsigned MaxRepresentableBits =
1533
1534 // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1535 // checks later on.
1536 unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
1537
1538 // NB: This only comes up if OpsFromSigned is true, so there is no need to
1539 // cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1540 auto IsNonZero = [&](unsigned OpNo) -> bool {
1541 if (OpsKnown[OpNo].hasKnownBits() &&
1542 OpsKnown[OpNo].getKnownBits(SQ).isNonZero())
1543 return true;
1544 return isKnownNonZero(IntOps[OpNo], SQ);
1545 };
1546
1547 auto IsNonNeg = [&](unsigned OpNo) -> bool {
1548 // NB: This matches the impl in ValueTracking, we just try to use cached
1549 // knownbits here. If we ever start supporting WithCache for
1550 // `isKnownNonNegative`, change this to an explicit call.
1551 return OpsKnown[OpNo].getKnownBits(SQ).isNonNegative();
1552 };
1553
1554 // Check if we know for certain that ({s|u}itofp op) is exact.
1555 auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1556 // Can we treat this operand as the desired sign?
1557 if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(OpNo)) &&
1558 !IsNonNeg(OpNo))
1559 return false;
1560
1561 // If fp precision >= bitwidth(op) then its exact.
1562 // NB: This is slightly conservative for `sitofp`. For signed conversion, we
1563 // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1564 // handled specially. We can't, however, increase the bound arbitrarily for
1565 // `sitofp` as for larger sizes, it won't sign extend.
1566 if (MaxRepresentableBits < IntSz) {
1567 // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1568 // numSignBits(op).
1569 // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1570 // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1571 if (OpsFromSigned)
1572 NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(IntOps[OpNo]);
1573 // Finally for unsigned check that fp precision >= bitwidth(op) -
1574 // numLeadingZeros(op).
1575 else {
1576 NumUsedLeadingBits[OpNo] =
1577 IntSz - OpsKnown[OpNo].getKnownBits(SQ).countMinLeadingZeros();
1578 }
1579 }
1580 // NB: We could also check if op is known to be a power of 2 or zero (which
1581 // will always be representable). Its unlikely, however, that is we are
1582 // unable to bound op in any way we will be able to pass the overflow checks
1583 // later on.
1584
1585 if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1586 return false;
1587 // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1588 return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
1589 IsNonZero(OpNo);
1590 };
1591
1592 // If we have a constant rhs, see if we can losslessly convert it to an int.
1593 if (Op1FpC != nullptr) {
1594 // Signed + Mul req non-zero
1595 if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1596 !match(Op1FpC, m_NonZeroFP()))
1597 return nullptr;
1598
1600 OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, Op1FpC,
1601 IntTy, DL);
1602 if (Op1IntC == nullptr)
1603 return nullptr;
1604 if (ConstantFoldCastOperand(OpsFromSigned ? Instruction::SIToFP
1605 : Instruction::UIToFP,
1606 Op1IntC, FPTy, DL) != Op1FpC)
1607 return nullptr;
1608
1609 // First try to keep sign of cast the same.
1610 IntOps[1] = Op1IntC;
1611 }
1612
1613 // Ensure lhs/rhs integer types match.
1614 if (IntTy != IntOps[1]->getType())
1615 return nullptr;
1616
1617 if (Op1FpC == nullptr) {
1618 if (!IsValidPromotion(1))
1619 return nullptr;
1620 }
1621 if (!IsValidPromotion(0))
1622 return nullptr;
1623
1624 // Final we check if the integer version of the binop will not overflow.
1626 // Because of the precision check, we can often rule out overflows.
1627 bool NeedsOverflowCheck = true;
1628 // Try to conservatively rule out overflow based on the already done precision
1629 // checks.
1630 unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
1631 unsigned OverflowMaxCurBits =
1632 std::max(NumUsedLeadingBits[0], NumUsedLeadingBits[1]);
1633 bool OutputSigned = OpsFromSigned;
1634 switch (BO.getOpcode()) {
1635 case Instruction::FAdd:
1636 IntOpc = Instruction::Add;
1637 OverflowMaxOutputBits += OverflowMaxCurBits;
1638 break;
1639 case Instruction::FSub:
1640 IntOpc = Instruction::Sub;
1641 OverflowMaxOutputBits += OverflowMaxCurBits;
1642 break;
1643 case Instruction::FMul:
1644 IntOpc = Instruction::Mul;
1645 OverflowMaxOutputBits += OverflowMaxCurBits * 2;
1646 break;
1647 default:
1648 llvm_unreachable("Unsupported binop");
1649 }
1650 // The precision check may have already ruled out overflow.
1651 if (OverflowMaxOutputBits < IntSz) {
1652 NeedsOverflowCheck = false;
1653 // We can bound unsigned overflow from sub to in range signed value (this is
1654 // what allows us to avoid the overflow check for sub).
1655 if (IntOpc == Instruction::Sub)
1656 OutputSigned = true;
1657 }
1658
1659 // Precision check did not rule out overflow, so need to check.
1660 // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1661 // `IntOps[...]` arguments to `KnownOps[...]`.
1662 if (NeedsOverflowCheck &&
1663 !willNotOverflow(IntOpc, IntOps[0], IntOps[1], BO, OutputSigned))
1664 return nullptr;
1665
1666 Value *IntBinOp = Builder.CreateBinOp(IntOpc, IntOps[0], IntOps[1]);
1667 if (auto *IntBO = dyn_cast<BinaryOperator>(IntBinOp)) {
1668 IntBO->setHasNoSignedWrap(OutputSigned);
1669 IntBO->setHasNoUnsignedWrap(!OutputSigned);
1670 }
1671 if (OutputSigned)
1672 return new SIToFPInst(IntBinOp, FPTy);
1673 return new UIToFPInst(IntBinOp, FPTy);
1674}
1675
1676// Try to fold:
1677// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1678// -> ({s|u}itofp (int_binop x, y))
1679// 2) (fp_binop ({s|u}itofp x), FpC)
1680// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1681Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1682 // Don't perform the fold on vectors, as the integer operation may be much
1683 // more expensive than the float operation in that case.
1684 if (BO.getType()->isVectorTy())
1685 return nullptr;
1686
1687 std::array<Value *, 2> IntOps = {nullptr, nullptr};
1688 Constant *Op1FpC = nullptr;
1689 // Check for:
1690 // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1691 // 2) (binop ({s|u}itofp x), FpC)
1692 if (!match(BO.getOperand(0), m_IToFP(m_Value(IntOps[0]))))
1693 return nullptr;
1694
1695 if (!match(BO.getOperand(1), m_Constant(Op1FpC)) &&
1696 !match(BO.getOperand(1), m_IToFP(m_Value(IntOps[1]))))
1697 return nullptr;
1698
1699 // Cache KnownBits a bit to potentially save some analysis.
1700 SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
1701
1702 // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1703 // different constraints depending on the sign of the cast.
1704 // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1705 if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
1706 IntOps, Op1FpC, OpsKnown))
1707 return R;
1708 return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
1709 Op1FpC, OpsKnown);
1710}
1711
1712/// A binop with a constant operand and a sign-extended boolean operand may be
1713/// converted into a select of constants by applying the binary operation to
1714/// the constant with the two possible values of the extended boolean (0 or -1).
1715Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1716 // TODO: Handle non-commutative binop (constant is operand 0).
1717 // TODO: Handle zext.
1718 // TODO: Peek through 'not' of cast.
1719 Value *BO0 = BO.getOperand(0);
1720 Value *BO1 = BO.getOperand(1);
1721 Value *X;
1722 Constant *C;
1723 if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
1724 !X->getType()->isIntOrIntVectorTy(1))
1725 return nullptr;
1726
1727 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1730 Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
1731 Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1732 return createSelectInstWithUnknownProfile(X, TVal, FVal);
1733}
1734
1736 bool IsTrueArm) {
1738 for (Value *Op : I.operands()) {
1739 Value *V = nullptr;
1740 if (Op == SI) {
1741 V = IsTrueArm ? SI->getTrueValue() : SI->getFalseValue();
1742 } else if (match(SI->getCondition(),
1745 m_Specific(Op), m_Value(V))) &&
1747 // Pass
1748 } else if (match(Op, m_ZExt(m_Specific(SI->getCondition())))) {
1749 V = IsTrueArm ? ConstantInt::get(Op->getType(), 1)
1750 : ConstantInt::getNullValue(Op->getType());
1751 } else {
1752 V = Op;
1753 }
1754 Ops.push_back(V);
1755 }
1756
1757 return simplifyInstructionWithOperands(&I, Ops, I.getDataLayout());
1758}
1759
1761 Value *NewOp, InstCombiner &IC) {
1762 Instruction *Clone = I.clone();
1763 Clone->replaceUsesOfWith(SI, NewOp);
1765 IC.InsertNewInstBefore(Clone, I.getIterator());
1766 return Clone;
1767}
1768
1770 bool FoldWithMultiUse,
1771 bool SimplifyBothArms) {
1772 // Don't modify shared select instructions unless set FoldWithMultiUse
1773 if (!SI->hasOneUser() && !FoldWithMultiUse)
1774 return nullptr;
1775
1776 Value *TV = SI->getTrueValue();
1777 Value *FV = SI->getFalseValue();
1778
1779 // Bool selects with constant operands can be folded to logical ops.
1780 if (SI->getType()->isIntOrIntVectorTy(1))
1781 return nullptr;
1782
1783 // Avoid breaking min/max reduction pattern,
1784 // which is necessary for vectorization later.
1786 for (Value *IntrinOp : Op.operands())
1787 if (auto *PN = dyn_cast<PHINode>(IntrinOp))
1788 for (Value *PhiOp : PN->operands())
1789 if (PhiOp == &Op)
1790 return nullptr;
1791
1792 // Test if a FCmpInst instruction is used exclusively by a select as
1793 // part of a minimum or maximum operation. If so, refrain from doing
1794 // any other folding. This helps out other analyses which understand
1795 // non-obfuscated minimum and maximum idioms. And in this case, at
1796 // least one of the comparison operands has at least one user besides
1797 // the compare (the select), which would often largely negate the
1798 // benefit of folding anyway.
1799 if (auto *CI = dyn_cast<FCmpInst>(SI->getCondition())) {
1800 if (CI->hasOneUse()) {
1801 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1802 if (((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1)) &&
1803 !CI->isCommutative())
1804 return nullptr;
1805 }
1806 }
1807
1808 // Make sure that one of the select arms folds successfully.
1809 Value *NewTV = simplifyOperationIntoSelectOperand(Op, SI, /*IsTrueArm=*/true);
1810 Value *NewFV =
1811 simplifyOperationIntoSelectOperand(Op, SI, /*IsTrueArm=*/false);
1812 if (!NewTV && !NewFV)
1813 return nullptr;
1814
1815 if (SimplifyBothArms && !(NewTV && NewFV))
1816 return nullptr;
1817
1818 // Create an instruction for the arm that did not fold.
1819 if (!NewTV)
1820 NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
1821 if (!NewFV)
1822 NewFV = foldOperationIntoSelectOperand(Op, SI, FV, *this);
1823 return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
1824}
1825
1827 Value *InValue, BasicBlock *InBB,
1828 const DataLayout &DL,
1829 const SimplifyQuery SQ) {
1830 // NB: It is a precondition of this transform that the operands be
1831 // phi translatable!
1833 for (Value *Op : I.operands()) {
1834 if (Op == PN)
1835 Ops.push_back(InValue);
1836 else
1837 Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
1838 }
1839
1840 // Don't consider the simplification successful if we get back a constant
1841 // expression. That's just an instruction in hiding.
1842 // Also reject the case where we simplify back to the phi node. We wouldn't
1843 // be able to remove it in that case.
1845 &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
1846 if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr()))
1847 return NewVal;
1848
1849 // Check if incoming PHI value can be replaced with constant
1850 // based on implied condition.
1851 CondBrInst *TerminatorBI = dyn_cast<CondBrInst>(InBB->getTerminator());
1852 const ICmpInst *ICmp = dyn_cast<ICmpInst>(&I);
1853 if (TerminatorBI &&
1854 TerminatorBI->getSuccessor(0) != TerminatorBI->getSuccessor(1) && ICmp) {
1855 bool LHSIsTrue = TerminatorBI->getSuccessor(0) == PN->getParent();
1856 std::optional<bool> ImpliedCond = isImpliedCondition(
1857 TerminatorBI->getCondition(), ICmp->getCmpPredicate(), Ops[0], Ops[1],
1858 DL, LHSIsTrue);
1859 if (ImpliedCond)
1860 return ConstantInt::getBool(I.getType(), ImpliedCond.value());
1861 }
1862
1863 return nullptr;
1864}
1865
1866/// In some cases it is beneficial to fold a select into a binary operator.
1867/// For example:
1868/// %1 = or %in, 4
1869/// %2 = select %cond, %1, %in
1870/// %3 = or %2, 1
1871/// =>
1872/// %1 = select i1 %cond, 5, 1
1873/// %2 = or %1, %in
1875 assert(Op.isAssociative() && "The operation must be associative!");
1876
1877 SelectInst *SI = dyn_cast<SelectInst>(Op.getOperand(0));
1878
1879 Constant *Const;
1880 if (!SI || !match(Op.getOperand(1), m_ImmConstant(Const)) ||
1881 !Op.hasOneUse() || !SI->hasOneUse())
1882 return nullptr;
1883
1884 Value *TV = SI->getTrueValue();
1885 Value *FV = SI->getFalseValue();
1886 Value *Input, *NewTV, *NewFV;
1887 Constant *Const2;
1888
1889 if (TV->hasOneUse() && match(TV, m_BinOp(Op.getOpcode(), m_Specific(FV),
1890 m_ImmConstant(Const2)))) {
1891 NewTV = ConstantFoldBinaryInstruction(Op.getOpcode(), Const, Const2);
1892 NewFV = Const;
1893 Input = FV;
1894 } else if (FV->hasOneUse() &&
1895 match(FV, m_BinOp(Op.getOpcode(), m_Specific(TV),
1896 m_ImmConstant(Const2)))) {
1897 NewTV = Const;
1898 NewFV = ConstantFoldBinaryInstruction(Op.getOpcode(), Const, Const2);
1899 Input = TV;
1900 } else
1901 return nullptr;
1902
1903 if (!NewTV || !NewFV)
1904 return nullptr;
1905
1906 Value *NewSI =
1907 Builder.CreateSelect(SI->getCondition(), NewTV, NewFV, "",
1908 ProfcheckDisableMetadataFixes ? nullptr : SI);
1909 return BinaryOperator::Create(Op.getOpcode(), NewSI, Input);
1910}
1911
1913 bool AllowMultipleUses) {
1914 unsigned NumPHIValues = PN->getNumIncomingValues();
1915 if (NumPHIValues == 0)
1916 return nullptr;
1917
1918 // We normally only transform phis with a single use. However, if a PHI has
1919 // multiple uses and they are all the same operation, we can fold *all* of the
1920 // uses into the PHI.
1921 bool OneUse = PN->hasOneUse();
1922 bool IdenticalUsers = false;
1923 if (!AllowMultipleUses && !OneUse) {
1924 // Walk the use list for the instruction, comparing them to I.
1925 for (User *U : PN->users()) {
1927 if (UI != &I && !I.isIdenticalTo(UI))
1928 return nullptr;
1929 }
1930 // Otherwise, we can replace *all* users with the new PHI we form.
1931 IdenticalUsers = true;
1932 }
1933
1934 // Check that all operands are phi-translatable.
1935 for (Value *Op : I.operands()) {
1936 if (Op == PN)
1937 continue;
1938
1939 // Non-instructions never require phi-translation.
1940 auto *I = dyn_cast<Instruction>(Op);
1941 if (!I)
1942 continue;
1943
1944 // Phi-translate can handle phi nodes in the same block.
1945 if (isa<PHINode>(I))
1946 if (I->getParent() == PN->getParent())
1947 continue;
1948
1949 // Operand dominates the block, no phi-translation necessary.
1950 if (DT.dominates(I, PN->getParent()))
1951 continue;
1952
1953 // Not phi-translatable, bail out.
1954 return nullptr;
1955 }
1956
1957 // Check to see whether the instruction can be folded into each phi operand.
1958 // If there is one operand that does not fold, remember the BB it is in.
1959 SmallVector<Value *> NewPhiValues;
1960 SmallVector<unsigned int> OpsToMoveUseToIncomingBB;
1961 bool SeenNonSimplifiedInVal = false;
1962 for (unsigned i = 0; i != NumPHIValues; ++i) {
1963 Value *InVal = PN->getIncomingValue(i);
1964 BasicBlock *InBB = PN->getIncomingBlock(i);
1965
1966 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InVal, InBB, DL, SQ)) {
1967 NewPhiValues.push_back(NewVal);
1968 continue;
1969 }
1970
1971 // Handle some cases that can't be fully simplified, but where we know that
1972 // the two instructions will fold into one.
1973 auto WillFold = [&]() {
1974 if (!InVal->hasUseList() || !InVal->hasOneUser())
1975 return false;
1976
1977 // icmp of ucmp/scmp with constant will fold to icmp.
1978 const APInt *Ignored;
1979 if (isa<CmpIntrinsic>(InVal) &&
1980 match(&I, m_ICmp(m_Specific(PN), m_APInt(Ignored))))
1981 return true;
1982
1983 // icmp eq zext(bool), 0 will fold to !bool.
1984 if (isa<ZExtInst>(InVal) &&
1985 cast<ZExtInst>(InVal)->getSrcTy()->isIntOrIntVectorTy(1) &&
1986 match(&I,
1988 return true;
1989
1990 return false;
1991 };
1992
1993 if (WillFold()) {
1994 OpsToMoveUseToIncomingBB.push_back(i);
1995 NewPhiValues.push_back(nullptr);
1996 continue;
1997 }
1998
1999 if (!OneUse && !IdenticalUsers)
2000 return nullptr;
2001
2002 if (SeenNonSimplifiedInVal)
2003 return nullptr; // More than one non-simplified value.
2004 SeenNonSimplifiedInVal = true;
2005
2006 // If there is exactly one non-simplified value, we can insert a copy of the
2007 // operation in that block. However, if this is a critical edge, we would
2008 // be inserting the computation on some other paths (e.g. inside a loop).
2009 // Only do this if the pred block is unconditionally branching into the phi
2010 // block. Also, make sure that the pred block is not dead code.
2012 if (!BI || !DT.isReachableFromEntry(InBB))
2013 return nullptr;
2014
2015 NewPhiValues.push_back(nullptr);
2016 OpsToMoveUseToIncomingBB.push_back(i);
2017
2018 // Do not push the operation across a loop backedge. This could result in
2019 // an infinite combine loop, and is generally non-profitable (especially
2020 // if the operation was originally outside the loop).
2021 if (isBackEdge(InBB, PN->getParent()))
2022 return nullptr;
2023 }
2024
2025 // Clone the instruction that uses the phi node and move it into the incoming
2026 // BB because we know that the next iteration of InstCombine will simplify it.
2028 for (auto OpIndex : OpsToMoveUseToIncomingBB) {
2030 BasicBlock *OpBB = PN->getIncomingBlock(OpIndex);
2031
2032 Instruction *Clone = Clones.lookup(OpBB);
2033 if (!Clone) {
2034 Clone = I.clone();
2035 for (Use &U : Clone->operands()) {
2036 if (U == PN)
2037 U = Op;
2038 else
2039 U = U->DoPHITranslation(PN->getParent(), OpBB);
2040 }
2041 Clone = InsertNewInstBefore(Clone, OpBB->getTerminator()->getIterator());
2042 Clones.insert({OpBB, Clone});
2043 // We may have speculated the instruction.
2045 }
2046
2047 NewPhiValues[OpIndex] = Clone;
2048 }
2049
2050 // Okay, we can do the transformation: create the new PHI node.
2051 PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
2052 InsertNewInstBefore(NewPN, PN->getIterator());
2053 NewPN->takeName(PN);
2054 NewPN->setDebugLoc(PN->getDebugLoc());
2055
2056 for (unsigned i = 0; i != NumPHIValues; ++i)
2057 NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
2058
2059 if (IdenticalUsers) {
2060 // Collect and deduplicate users up-front to avoid iterator invalidation.
2062 for (User *U : PN->users()) {
2064 if (User == &I)
2065 continue;
2066 ToReplace.insert(User);
2067 }
2068 for (Instruction *I : ToReplace) {
2069 replaceInstUsesWith(*I, NewPN);
2071 }
2072 OneUse = true;
2073 }
2074
2075 if (OneUse) {
2076 replaceAllDbgUsesWith(*PN, *NewPN, *PN, DT);
2077 }
2078 return replaceInstUsesWith(I, NewPN);
2079}
2080
2082 if (!BO.isAssociative())
2083 return nullptr;
2084
2085 // Find the interleaved binary ops.
2086 auto Opc = BO.getOpcode();
2087 auto *BO0 = dyn_cast<BinaryOperator>(BO.getOperand(0));
2088 auto *BO1 = dyn_cast<BinaryOperator>(BO.getOperand(1));
2089 if (!BO0 || !BO1 || !BO0->hasNUses(2) || !BO1->hasNUses(2) ||
2090 BO0->getOpcode() != Opc || BO1->getOpcode() != Opc ||
2091 !BO0->isAssociative() || !BO1->isAssociative() ||
2092 BO0->getParent() != BO1->getParent())
2093 return nullptr;
2094
2095 assert(BO.isCommutative() && BO0->isCommutative() && BO1->isCommutative() &&
2096 "Expected commutative instructions!");
2097
2098 // Find the matching phis, forming the recurrences.
2099 PHINode *PN0, *PN1;
2100 Value *Start0, *Step0, *Start1, *Step1;
2101 if (!matchSimpleRecurrence(BO0, PN0, Start0, Step0) || !PN0->hasOneUse() ||
2102 !matchSimpleRecurrence(BO1, PN1, Start1, Step1) || !PN1->hasOneUse() ||
2103 PN0->getParent() != PN1->getParent())
2104 return nullptr;
2105
2106 assert(PN0->getNumIncomingValues() == 2 && PN1->getNumIncomingValues() == 2 &&
2107 "Expected PHIs with two incoming values!");
2108
2109 // Convert the start and step values to constants.
2110 auto *Init0 = dyn_cast<Constant>(Start0);
2111 auto *Init1 = dyn_cast<Constant>(Start1);
2112 auto *C0 = dyn_cast<Constant>(Step0);
2113 auto *C1 = dyn_cast<Constant>(Step1);
2114 if (!Init0 || !Init1 || !C0 || !C1)
2115 return nullptr;
2116
2117 // Fold the recurrence constants.
2118 auto *Init = ConstantFoldBinaryInstruction(Opc, Init0, Init1);
2119 auto *C = ConstantFoldBinaryInstruction(Opc, C0, C1);
2120 if (!Init || !C)
2121 return nullptr;
2122
2123 // Create the reduced PHI.
2124 auto *NewPN = PHINode::Create(PN0->getType(), PN0->getNumIncomingValues(),
2125 "reduced.phi");
2126
2127 // Create the new binary op.
2128 auto *NewBO = BinaryOperator::Create(Opc, NewPN, C);
2129 if (Opc == Instruction::FAdd || Opc == Instruction::FMul) {
2130 // Intersect FMF flags for FADD and FMUL.
2131 FastMathFlags Intersect = BO0->getFastMathFlags() &
2132 BO1->getFastMathFlags() & BO.getFastMathFlags();
2133 NewBO->setFastMathFlags(Intersect);
2134 } else {
2135 OverflowTracking Flags;
2136 Flags.AllKnownNonNegative = false;
2137 Flags.AllKnownNonZero = false;
2138 Flags.mergeFlags(*BO0);
2139 Flags.mergeFlags(*BO1);
2140 Flags.mergeFlags(BO);
2141 Flags.applyFlags(*NewBO);
2142 }
2143 NewBO->takeName(&BO);
2144
2145 for (unsigned I = 0, E = PN0->getNumIncomingValues(); I != E; ++I) {
2146 auto *V = PN0->getIncomingValue(I);
2147 auto *BB = PN0->getIncomingBlock(I);
2148 if (V == Init0) {
2149 assert(((PN1->getIncomingValue(0) == Init1 &&
2150 PN1->getIncomingBlock(0) == BB) ||
2151 (PN1->getIncomingValue(1) == Init1 &&
2152 PN1->getIncomingBlock(1) == BB)) &&
2153 "Invalid incoming block!");
2154 NewPN->addIncoming(Init, BB);
2155 } else if (V == BO0) {
2156 assert(((PN1->getIncomingValue(0) == BO1 &&
2157 PN1->getIncomingBlock(0) == BB) ||
2158 (PN1->getIncomingValue(1) == BO1 &&
2159 PN1->getIncomingBlock(1) == BB)) &&
2160 "Invalid incoming block!");
2161 NewPN->addIncoming(NewBO, BB);
2162 } else
2163 llvm_unreachable("Unexpected incoming value!");
2164 }
2165
2166 LLVM_DEBUG(dbgs() << " Combined " << *PN0 << "\n " << *BO0
2167 << "\n with " << *PN1 << "\n " << *BO1
2168 << '\n');
2169
2170 // Insert the new recurrence and remove the old (dead) ones.
2171 InsertNewInstWith(NewPN, PN0->getIterator());
2172 InsertNewInstWith(NewBO, BO0->getIterator());
2173
2180
2181 return replaceInstUsesWith(BO, NewBO);
2182}
2183
2185 // Attempt to fold binary operators whose operands are simple recurrences.
2186 if (auto *NewBO = foldBinopWithRecurrence(BO))
2187 return NewBO;
2188
2189 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
2190 // we are guarding against replicating the binop in >1 predecessor.
2191 // This could miss matching a phi with 2 constant incoming values.
2192 auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
2193 auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
2194 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
2195 Phi0->getNumOperands() != Phi1->getNumOperands())
2196 return nullptr;
2197
2198 // TODO: Remove the restriction for binop being in the same block as the phis.
2199 if (BO.getParent() != Phi0->getParent() ||
2200 BO.getParent() != Phi1->getParent())
2201 return nullptr;
2202
2203 // Fold if there is at least one specific constant value in phi0 or phi1's
2204 // incoming values that comes from the same block and this specific constant
2205 // value can be used to do optimization for specific binary operator.
2206 // For example:
2207 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
2208 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
2209 // %add = add i32 %phi0, %phi1
2210 // ==>
2211 // %add = phi i32 [%j, %bb0], [%i, %bb1]
2213 /*AllowRHSConstant*/ false);
2214 if (C) {
2215 SmallVector<Value *, 4> NewIncomingValues;
2216 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
2217 auto &Phi0Use = std::get<0>(T);
2218 auto &Phi1Use = std::get<1>(T);
2219 if (Phi0->getIncomingBlock(Phi0Use) != Phi1->getIncomingBlock(Phi1Use))
2220 return false;
2221 Value *Phi0UseV = Phi0Use.get();
2222 Value *Phi1UseV = Phi1Use.get();
2223 if (Phi0UseV == C)
2224 NewIncomingValues.push_back(Phi1UseV);
2225 else if (Phi1UseV == C)
2226 NewIncomingValues.push_back(Phi0UseV);
2227 else
2228 return false;
2229 return true;
2230 };
2231
2232 if (all_of(zip(Phi0->operands(), Phi1->operands()),
2233 CanFoldIncomingValuePair)) {
2234 PHINode *NewPhi =
2235 PHINode::Create(Phi0->getType(), Phi0->getNumOperands());
2236 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
2237 "The number of collected incoming values should equal the number "
2238 "of the original PHINode operands!");
2239 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
2240 NewPhi->addIncoming(NewIncomingValues[I], Phi0->getIncomingBlock(I));
2241 return NewPhi;
2242 }
2243 }
2244
2245 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
2246 return nullptr;
2247
2248 // Match a pair of incoming constants for one of the predecessor blocks.
2249 BasicBlock *ConstBB, *OtherBB;
2250 Constant *C0, *C1;
2251 if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
2252 ConstBB = Phi0->getIncomingBlock(0);
2253 OtherBB = Phi0->getIncomingBlock(1);
2254 } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
2255 ConstBB = Phi0->getIncomingBlock(1);
2256 OtherBB = Phi0->getIncomingBlock(0);
2257 } else {
2258 return nullptr;
2259 }
2260 if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
2261 return nullptr;
2262
2263 // The block that we are hoisting to must reach here unconditionally.
2264 // Otherwise, we could be speculatively executing an expensive or
2265 // non-speculative op.
2266 auto *PredBlockBranch = dyn_cast<UncondBrInst>(OtherBB->getTerminator());
2267 if (!PredBlockBranch || !DT.isReachableFromEntry(OtherBB))
2268 return nullptr;
2269
2270 // TODO: This check could be tightened to only apply to binops (div/rem) that
2271 // are not safe to speculatively execute. But that could allow hoisting
2272 // potentially expensive instructions (fdiv for example).
2273 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
2275 return nullptr;
2276
2277 // Fold constants for the predecessor block with constant incoming values.
2278 Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
2279 if (!NewC)
2280 return nullptr;
2281
2282 // Make a new binop in the predecessor block with the non-constant incoming
2283 // values.
2284 Builder.SetInsertPoint(PredBlockBranch);
2285 Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
2286 Phi0->getIncomingValueForBlock(OtherBB),
2287 Phi1->getIncomingValueForBlock(OtherBB));
2288 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
2289 NotFoldedNewBO->copyIRFlags(&BO);
2290
2291 // Replace the binop with a phi of the new values. The old phis are dead.
2292 PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
2293 NewPhi->addIncoming(NewBO, OtherBB);
2294 NewPhi->addIncoming(NewC, ConstBB);
2295 return NewPhi;
2296}
2297
2299 auto TryFoldOperand = [&](unsigned OpIdx,
2300 bool IsOtherParamConst) -> Instruction * {
2301 if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(OpIdx)))
2302 return FoldOpIntoSelect(I, Sel, false, !IsOtherParamConst);
2303 if (auto *PN = dyn_cast<PHINode>(I.getOperand(OpIdx)))
2304 return foldOpIntoPhi(I, PN);
2305 return nullptr;
2306 };
2307
2308 if (Instruction *NewI =
2309 TryFoldOperand(/*OpIdx=*/0, isa<Constant>(I.getOperand(1))))
2310 return NewI;
2311 return TryFoldOperand(/*OpIdx=*/1, isa<Constant>(I.getOperand(0)));
2312}
2313
2315 // If this GEP has only 0 indices, it is the same pointer as
2316 // Src. If Src is not a trivial GEP too, don't combine
2317 // the indices.
2318 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
2319 !Src.hasOneUse())
2320 return false;
2321 return true;
2322}
2323
2324/// Find a constant NewC that has property:
2325/// shuffle(NewC, ShMask) = C
2326/// Returns nullptr if such a constant does not exist e.g. ShMask=<0,0> C=<1,2>
2327///
2328/// A 1-to-1 mapping is not required. Example:
2329/// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison>
2331 VectorType *NewCTy) {
2332 if (isa<ScalableVectorType>(NewCTy)) {
2333 Constant *Splat = C->getSplatValue();
2334 if (!Splat)
2335 return nullptr;
2337 }
2338
2339 if (cast<FixedVectorType>(NewCTy)->getNumElements() >
2340 cast<FixedVectorType>(C->getType())->getNumElements())
2341 return nullptr;
2342
2343 unsigned NewCNumElts = cast<FixedVectorType>(NewCTy)->getNumElements();
2344 PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType());
2345 SmallVector<Constant *, 16> NewVecC(NewCNumElts, PoisonScalar);
2346 unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
2347 for (unsigned I = 0; I < NumElts; ++I) {
2348 Constant *CElt = C->getAggregateElement(I);
2349 if (ShMask[I] >= 0) {
2350 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2351 Constant *NewCElt = NewVecC[ShMask[I]];
2352 // Bail out if:
2353 // 1. The constant vector contains a constant expression.
2354 // 2. The shuffle needs an element of the constant vector that can't
2355 // be mapped to a new constant vector.
2356 // 3. This is a widening shuffle that copies elements of V1 into the
2357 // extended elements (extending with poison is allowed).
2358 if (!CElt || (!isa<PoisonValue>(NewCElt) && NewCElt != CElt) ||
2359 I >= NewCNumElts)
2360 return nullptr;
2361 NewVecC[ShMask[I]] = CElt;
2362 }
2363 }
2364 return ConstantVector::get(NewVecC);
2365}
2366
2367// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS).
2369 Constant *Splat, bool SplatLHS,
2370 const DataLayout &DL) {
2371 ElementCount EC = cast<VectorType>(Vector->getType())->getElementCount();
2373 Constant *RHS = Vector;
2374 if (!SplatLHS)
2375 std::swap(LHS, RHS);
2376 return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
2377}
2378
2379template <Intrinsic::ID SpliceID>
2381 InstCombiner::BuilderTy &Builder) {
2382 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
2383 auto CreateBinOpSplice = [&](Value *X, Value *Y, Value *Offset) {
2384 Value *V = Builder.CreateBinOp(Inst.getOpcode(), X, Y, Inst.getName());
2385 if (auto *BO = dyn_cast<BinaryOperator>(V))
2386 BO->copyIRFlags(&Inst);
2387 Module *M = Inst.getModule();
2388 Function *F = Intrinsic::getOrInsertDeclaration(M, SpliceID, V->getType());
2389 return CallInst::Create(F, {V, PoisonValue::get(V->getType()), Offset});
2390 };
2391 Value *V1, *V2, *Offset;
2392 if (match(LHS,
2394 // Op(splice(V1, poison, offset), splice(V2, poison, offset))
2395 // -> splice(Op(V1, V2), poison, offset)
2397 m_Specific(Offset))) &&
2398 (LHS->hasOneUse() || RHS->hasOneUse() ||
2399 (LHS == RHS && LHS->hasNUses(2))))
2400 return CreateBinOpSplice(V1, V2, Offset);
2401
2402 // Op(splice(V1, poison, offset), RHSSplat)
2403 // -> splice(Op(V1, RHSSplat), poison, offset)
2404 if (LHS->hasOneUse() && isSplatValue(RHS))
2405 return CreateBinOpSplice(V1, RHS, Offset);
2406 }
2407 // Op(LHSSplat, splice(V2, poison, offset))
2408 // -> splice(Op(LHSSplat, V2), poison, offset)
2409 else if (isSplatValue(LHS) &&
2411 m_Value(Offset)))))
2412 return CreateBinOpSplice(LHS, V2, Offset);
2413
2414 // TODO: Fold binops of the form
2415 // Op(splice(poison, V1, offset), splice(poison, V2, offset))
2416 // -> splice(poison, Op(V1, V2), offset)
2417
2418 return nullptr;
2419}
2420
2422 if (!isa<VectorType>(Inst.getType()))
2423 return nullptr;
2424
2425 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
2426 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
2427 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
2428 cast<VectorType>(Inst.getType())->getElementCount());
2429 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
2430 cast<VectorType>(Inst.getType())->getElementCount());
2431
2432 auto foldConstantsThroughSubVectorInsertSplat =
2433 [&](Value *MaybeSubVector, Value *MaybeSplat,
2434 bool SplatLHS) -> Instruction * {
2435 Value *Idx;
2436 Constant *Splat, *SubVector, *Dest;
2437 if (!match(MaybeSplat, m_ConstantSplat(m_Constant(Splat))) ||
2438 !match(MaybeSubVector,
2439 m_VectorInsert(m_Constant(Dest), m_Constant(SubVector),
2440 m_Value(Idx))))
2441 return nullptr;
2442 SubVector =
2443 constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL);
2444 Dest = constantFoldBinOpWithSplat(Opcode, Dest, Splat, SplatLHS, DL);
2445 if (!SubVector || !Dest)
2446 return nullptr;
2447 auto *InsertVector =
2448 Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, Idx);
2449 return replaceInstUsesWith(Inst, InsertVector);
2450 };
2451
2452 // If one operand is a constant splat and the other operand is a
2453 // `vector.insert` where both the destination and subvector are constant,
2454 // apply the operation to both the destination and subvector, returning a new
2455 // constant `vector.insert`. This helps constant folding for scalable vectors.
2456 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2457 /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false))
2458 return Folded;
2459 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2460 /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true))
2461 return Folded;
2462
2463 // If both operands of the binop are vector concatenations, then perform the
2464 // narrow binop on each pair of the source operands followed by concatenation
2465 // of the results.
2466 Value *L0, *L1, *R0, *R1;
2467 ArrayRef<int> Mask;
2468 if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
2469 match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) &&
2470 LHS->hasOneUse() && RHS->hasOneUse() &&
2471 cast<ShuffleVectorInst>(LHS)->isConcat() &&
2472 cast<ShuffleVectorInst>(RHS)->isConcat()) {
2473 // This transform does not have the speculative execution constraint as
2474 // below because the shuffle is a concatenation. The new binops are
2475 // operating on exactly the same elements as the existing binop.
2476 // TODO: We could ease the mask requirement to allow different undef lanes,
2477 // but that requires an analysis of the binop-with-undef output value.
2478 Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
2479 if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
2480 BO->copyIRFlags(&Inst);
2481 Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
2482 if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
2483 BO->copyIRFlags(&Inst);
2484 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
2485 }
2486
2487 auto createBinOpReverse = [&](Value *X, Value *Y) {
2488 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
2489 if (auto *BO = dyn_cast<BinaryOperator>(V))
2490 BO->copyIRFlags(&Inst);
2491 Module *M = Inst.getModule();
2493 M, Intrinsic::vector_reverse, V->getType());
2494 return CallInst::Create(F, V);
2495 };
2496
2497 // NOTE: Reverse shuffles don't require the speculative execution protection
2498 // below because they don't affect which lanes take part in the computation.
2499
2500 Value *V1, *V2;
2501 if (match(LHS, m_VecReverse(m_Value(V1)))) {
2502 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2503 if (match(RHS, m_VecReverse(m_Value(V2))) &&
2504 (LHS->hasOneUse() || RHS->hasOneUse() ||
2505 (LHS == RHS && LHS->hasNUses(2))))
2506 return createBinOpReverse(V1, V2);
2507
2508 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2509 if (LHS->hasOneUse() && isSplatValue(RHS))
2510 return createBinOpReverse(V1, RHS);
2511 }
2512 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2513 else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
2514 return createBinOpReverse(LHS, V2);
2515
2516 auto createBinOpVPReverse = [&](Value *X, Value *Y, Value *EVL) {
2517 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
2518 if (auto *BO = dyn_cast<BinaryOperator>(V))
2519 BO->copyIRFlags(&Inst);
2520
2521 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
2522 Value *AllTrueMask = Builder.CreateVectorSplat(EC, Builder.getTrue());
2523 Module *M = Inst.getModule();
2525 M, Intrinsic::experimental_vp_reverse, V->getType());
2526 return CallInst::Create(F, {V, AllTrueMask, EVL});
2527 };
2528
2529 Value *EVL;
2531 m_Value(V1), m_AllOnes(), m_Value(EVL)))) {
2532 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2534 m_Value(V2), m_AllOnes(), m_Specific(EVL))) &&
2535 (LHS->hasOneUse() || RHS->hasOneUse() ||
2536 (LHS == RHS && LHS->hasNUses(2))))
2537 return createBinOpVPReverse(V1, V2, EVL);
2538
2539 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2540 if (LHS->hasOneUse() && isSplatValue(RHS))
2541 return createBinOpVPReverse(V1, RHS, EVL);
2542 }
2543 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2544 else if (isSplatValue(LHS) &&
2546 m_Value(V2), m_AllOnes(), m_Value(EVL))))
2547 return createBinOpVPReverse(LHS, V2, EVL);
2548
2549 if (Instruction *Folded =
2551 return Folded;
2552 if (Instruction *Folded =
2554 return Folded;
2555
2556 // It may not be safe to reorder shuffles and things like div, urem, etc.
2557 // because we may trap when executing those ops on unknown vector elements.
2558 // See PR20059.
2560 return nullptr;
2561
2562 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
2563 Value *XY = Builder.CreateBinOp(Opcode, X, Y);
2564 if (auto *BO = dyn_cast<BinaryOperator>(XY))
2565 BO->copyIRFlags(&Inst);
2566 return new ShuffleVectorInst(XY, M);
2567 };
2568
2569 // If both arguments of the binary operation are shuffles that use the same
2570 // mask and shuffle within a single vector, move the shuffle after the binop.
2571 if (match(LHS, m_Shuffle(m_Value(V1), m_Poison(), m_Mask(Mask))) &&
2572 match(RHS, m_Shuffle(m_Value(V2), m_Poison(), m_SpecificMask(Mask))) &&
2573 V1->getType() == V2->getType() &&
2574 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
2575 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2576 return createBinOpShuffle(V1, V2, Mask);
2577 }
2578
2579 // If both arguments of a commutative binop are select-shuffles that use the
2580 // same mask with commuted operands, the shuffles are unnecessary.
2581 if (Inst.isCommutative() &&
2582 match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
2583 match(RHS,
2584 m_Shuffle(m_Specific(V2), m_Specific(V1), m_SpecificMask(Mask)))) {
2585 auto *LShuf = cast<ShuffleVectorInst>(LHS);
2586 auto *RShuf = cast<ShuffleVectorInst>(RHS);
2587 // TODO: Allow shuffles that contain undefs in the mask?
2588 // That is legal, but it reduces undef knowledge.
2589 // TODO: Allow arbitrary shuffles by shuffling after binop?
2590 // That might be legal, but we have to deal with poison.
2591 if (LShuf->isSelect() &&
2592 !is_contained(LShuf->getShuffleMask(), PoisonMaskElem) &&
2593 RShuf->isSelect() &&
2594 !is_contained(RShuf->getShuffleMask(), PoisonMaskElem)) {
2595 // Example:
2596 // LHS = shuffle V1, V2, <0, 5, 6, 3>
2597 // RHS = shuffle V2, V1, <0, 5, 6, 3>
2598 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2599 Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
2600 NewBO->copyIRFlags(&Inst);
2601 return NewBO;
2602 }
2603 }
2604
2605 // If one argument is a shuffle within one vector and the other is a constant,
2606 // try moving the shuffle after the binary operation. This canonicalization
2607 // intends to move shuffles closer to other shuffles and binops closer to
2608 // other binops, so they can be folded. It may also enable demanded elements
2609 // transforms.
2610 Constant *C;
2612 m_Mask(Mask))),
2613 m_ImmConstant(C)))) {
2614 assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
2615 "Shuffle should not change scalar type");
2616
2617 bool ConstOp1 = isa<Constant>(RHS);
2618 if (Constant *NewC =
2620 // For fixed vectors, lanes of NewC not used by the shuffle will be poison
2621 // which will cause UB for div/rem. Mask them with a safe constant.
2622 if (isa<FixedVectorType>(V1->getType()) && Inst.isIntDivRem())
2623 NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
2624
2625 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2626 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2627 Value *NewLHS = ConstOp1 ? V1 : NewC;
2628 Value *NewRHS = ConstOp1 ? NewC : V1;
2629 return createBinOpShuffle(NewLHS, NewRHS, Mask);
2630 }
2631 }
2632
2633 // Try to reassociate to sink a splat shuffle after a binary operation.
2634 if (Inst.isAssociative() && Inst.isCommutative()) {
2635 // Canonicalize shuffle operand as LHS.
2636 if (isa<ShuffleVectorInst>(RHS))
2637 std::swap(LHS, RHS);
2638
2639 Value *X;
2640 ArrayRef<int> MaskC;
2641 int SplatIndex;
2642 Value *Y, *OtherOp;
2643 if (!match(LHS,
2644 m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
2645 !match(MaskC, m_SplatOrPoisonMask(SplatIndex)) ||
2646 X->getType() != Inst.getType() ||
2647 !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
2648 return nullptr;
2649
2650 // FIXME: This may not be safe if the analysis allows undef elements. By
2651 // moving 'Y' before the splat shuffle, we are implicitly assuming
2652 // that it is not undef/poison at the splat index.
2653 if (isSplatValue(OtherOp, SplatIndex)) {
2654 std::swap(Y, OtherOp);
2655 } else if (!isSplatValue(Y, SplatIndex)) {
2656 return nullptr;
2657 }
2658
2659 // X and Y are splatted values, so perform the binary operation on those
2660 // values followed by a splat followed by the 2nd binary operation:
2661 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2662 Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
2663 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
2664 Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
2665 Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
2666
2667 // Intersect FMF on both new binops. Other (poison-generating) flags are
2668 // dropped to be safe.
2669 if (isa<FPMathOperator>(R)) {
2670 R->copyFastMathFlags(&Inst);
2671 R->andIRFlags(RHS);
2672 }
2673 if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
2674 NewInstBO->copyIRFlags(R);
2675 return R;
2676 }
2677
2678 return nullptr;
2679}
2680
2681/// Try to narrow the width of a binop if at least 1 operand is an extend of
2682/// of a value. This requires a potentially expensive known bits check to make
2683/// sure the narrow op does not overflow.
2684Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2685 // We need at least one extended operand.
2686 Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
2687
2688 // If this is a sub, we swap the operands since we always want an extension
2689 // on the RHS. The LHS can be an extension or a constant.
2690 if (BO.getOpcode() == Instruction::Sub)
2691 std::swap(Op0, Op1);
2692
2693 Value *X;
2694 bool IsSext = match(Op0, m_SExt(m_Value(X)));
2695 if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
2696 return nullptr;
2697
2698 // If both operands are the same extension from the same source type and we
2699 // can eliminate at least one (hasOneUse), this might work.
2700 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2701 Value *Y;
2702 if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
2703 cast<Operator>(Op1)->getOpcode() == CastOpc &&
2704 (Op0->hasOneUse() || Op1->hasOneUse()))) {
2705 // If that did not match, see if we have a suitable constant operand.
2706 // Truncating and extending must produce the same constant.
2707 Constant *WideC;
2708 if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
2709 return nullptr;
2710 Constant *NarrowC = getLosslessInvCast(WideC, X->getType(), CastOpc, DL);
2711 if (!NarrowC)
2712 return nullptr;
2713 Y = NarrowC;
2714 }
2715
2716 // Swap back now that we found our operands.
2717 if (BO.getOpcode() == Instruction::Sub)
2718 std::swap(X, Y);
2719
2720 // Both operands have narrow versions. Last step: the math must not overflow
2721 // in the narrow width.
2722 if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
2723 return nullptr;
2724
2725 // bo (ext X), (ext Y) --> ext (bo X, Y)
2726 // bo (ext X), C --> ext (bo X, C')
2727 Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
2728 if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
2729 if (IsSext)
2730 NewBinOp->setHasNoSignedWrap();
2731 else
2732 NewBinOp->setHasNoUnsignedWrap();
2733 }
2734 return CastInst::Create(CastOpc, NarrowBO, BO.getType());
2735}
2736
2737/// Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y))
2738/// transform.
2743
2744/// Thread a GEP operation with constant indices through the constant true/false
2745/// arms of a select.
2747 InstCombiner::BuilderTy &Builder) {
2748 if (!GEP.hasAllConstantIndices())
2749 return nullptr;
2750
2751 Instruction *Sel;
2752 Value *Cond;
2753 Constant *TrueC, *FalseC;
2754 if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
2755 !match(Sel,
2756 m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
2757 return nullptr;
2758
2759 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2760 // Propagate 'inbounds' and metadata from existing instructions.
2761 // Note: using IRBuilder to create the constants for efficiency.
2762 SmallVector<Value *, 4> IndexC(GEP.indices());
2763 GEPNoWrapFlags NW = GEP.getNoWrapFlags();
2764 Type *Ty = GEP.getSourceElementType();
2765 Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", NW);
2766 Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", NW);
2767 return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
2768}
2769
2770// Canonicalization:
2771// gep T, (gep i8, base, C1), (Index + C2) into
2772// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index
2774 GEPOperator *Src,
2775 InstCombinerImpl &IC) {
2776 if (GEP.getNumIndices() != 1)
2777 return nullptr;
2778 auto &DL = IC.getDataLayout();
2779 Value *Base;
2780 const APInt *C1;
2781 if (!match(Src, m_PtrAdd(m_Value(Base), m_APInt(C1))))
2782 return nullptr;
2783 Value *VarIndex;
2784 const APInt *C2;
2785 Type *PtrTy = Src->getType()->getScalarType();
2786 unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(PtrTy);
2787 if (!match(GEP.getOperand(1), m_AddLike(m_Value(VarIndex), m_APInt(C2))))
2788 return nullptr;
2789 if (C1->getBitWidth() != IndexSizeInBits ||
2790 C2->getBitWidth() != IndexSizeInBits)
2791 return nullptr;
2792 Type *BaseType = GEP.getSourceElementType();
2794 return nullptr;
2795 APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(BaseType));
2796 APInt NewOffset = TypeSize * *C2 + *C1;
2797 if (NewOffset.isZero() ||
2798 (Src->hasOneUse() && GEP.getOperand(1)->hasOneUse())) {
2800 if (GEP.hasNoUnsignedWrap() &&
2801 cast<GEPOperator>(Src)->hasNoUnsignedWrap() &&
2802 match(GEP.getOperand(1), m_NUWAddLike(m_Value(), m_Value()))) {
2804 if (GEP.isInBounds() && cast<GEPOperator>(Src)->isInBounds())
2805 Flags |= GEPNoWrapFlags::inBounds();
2806 }
2807
2808 Value *GEPConst =
2809 IC.Builder.CreatePtrAdd(Base, IC.Builder.getInt(NewOffset), "", Flags);
2810 return GetElementPtrInst::Create(BaseType, GEPConst, VarIndex, Flags);
2811 }
2812
2813 return nullptr;
2814}
2815
2816/// Combine constant offsets separated by variable offsets.
2817/// ptradd (ptradd (ptradd p, C1), x), C2 -> ptradd (ptradd p, x), C1+C2
2819 InstCombinerImpl &IC) {
2820 if (!GEP.hasAllConstantIndices())
2821 return nullptr;
2822
2825 auto *InnerGEP = dyn_cast<GetElementPtrInst>(GEP.getPointerOperand());
2826 while (true) {
2827 if (!InnerGEP)
2828 return nullptr;
2829
2830 NW = NW.intersectForReassociate(InnerGEP->getNoWrapFlags());
2831 if (InnerGEP->hasAllConstantIndices())
2832 break;
2833
2834 if (!InnerGEP->hasOneUse())
2835 return nullptr;
2836
2837 Skipped.push_back(InnerGEP);
2838 InnerGEP = dyn_cast<GetElementPtrInst>(InnerGEP->getPointerOperand());
2839 }
2840
2841 // The two constant offset GEPs are directly adjacent: Let normal offset
2842 // merging handle it.
2843 if (Skipped.empty())
2844 return nullptr;
2845
2846 // FIXME: This one-use check is not strictly necessary. Consider relaxing it
2847 // if profitable.
2848 if (!InnerGEP->hasOneUse())
2849 return nullptr;
2850
2851 // Don't bother with vector splats.
2852 Type *Ty = GEP.getType();
2853 if (InnerGEP->getType() != Ty)
2854 return nullptr;
2855
2856 const DataLayout &DL = IC.getDataLayout();
2857 APInt Offset(DL.getIndexTypeSizeInBits(Ty), 0);
2858 if (!GEP.accumulateConstantOffset(DL, Offset) ||
2859 !InnerGEP->accumulateConstantOffset(DL, Offset))
2860 return nullptr;
2861
2862 IC.replaceOperand(*Skipped.back(), 0, InnerGEP->getPointerOperand());
2863 for (GetElementPtrInst *SkippedGEP : Skipped)
2864 SkippedGEP->setNoWrapFlags(NW);
2865
2866 return IC.replaceInstUsesWith(
2867 GEP,
2868 IC.Builder.CreatePtrAdd(Skipped.front(), IC.Builder.getInt(Offset), "",
2869 NW.intersectForOffsetAdd(GEP.getNoWrapFlags())));
2870}
2871
2873 GEPOperator *Src) {
2874 // Combine Indices - If the source pointer to this getelementptr instruction
2875 // is a getelementptr instruction with matching element type, combine the
2876 // indices of the two getelementptr instructions into a single instruction.
2877 if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
2878 return nullptr;
2879
2880 if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, *this))
2881 return I;
2882
2883 if (auto *I = combineConstantOffsets(GEP, *this))
2884 return I;
2885
2886 if (Src->getResultElementType() != GEP.getSourceElementType())
2887 return nullptr;
2888
2889 // Fold chained GEP with constant base into single GEP:
2890 // gep i8, (gep i8, %base, C1), (select Cond, C2, C3)
2891 // -> gep i8, %base, (select Cond, C1+C2, C1+C3)
2892 if (Src->hasOneUse() && GEP.getNumIndices() == 1 &&
2893 Src->getNumIndices() == 1) {
2894 Value *SrcIdx = *Src->idx_begin();
2895 Value *GEPIdx = *GEP.idx_begin();
2896 const APInt *ConstOffset, *TrueVal, *FalseVal;
2897 Value *Cond;
2898
2899 if ((match(SrcIdx, m_APInt(ConstOffset)) &&
2900 match(GEPIdx,
2901 m_Select(m_Value(Cond), m_APInt(TrueVal), m_APInt(FalseVal)))) ||
2902 (match(GEPIdx, m_APInt(ConstOffset)) &&
2903 match(SrcIdx,
2904 m_Select(m_Value(Cond), m_APInt(TrueVal), m_APInt(FalseVal))))) {
2905 auto *Select = isa<SelectInst>(GEPIdx) ? cast<SelectInst>(GEPIdx)
2906 : cast<SelectInst>(SrcIdx);
2907
2908 // Make sure the select has only one use.
2909 if (!Select->hasOneUse())
2910 return nullptr;
2911
2912 if (TrueVal->getBitWidth() != ConstOffset->getBitWidth() ||
2913 FalseVal->getBitWidth() != ConstOffset->getBitWidth())
2914 return nullptr;
2915
2916 APInt NewTrueVal = *ConstOffset + *TrueVal;
2917 APInt NewFalseVal = *ConstOffset + *FalseVal;
2918 Constant *NewTrue = ConstantInt::get(Select->getType(), NewTrueVal);
2919 Constant *NewFalse = ConstantInt::get(Select->getType(), NewFalseVal);
2920 Value *NewSelect = Builder.CreateSelect(
2921 Cond, NewTrue, NewFalse, /*Name=*/"",
2922 /*MDFrom=*/(ProfcheckDisableMetadataFixes ? nullptr : Select));
2923 GEPNoWrapFlags Flags =
2925 return replaceInstUsesWith(GEP,
2926 Builder.CreateGEP(GEP.getResultElementType(),
2927 Src->getPointerOperand(),
2928 NewSelect, "", Flags));
2929 }
2930 }
2931
2932 // Find out whether the last index in the source GEP is a sequential idx.
2933 bool EndsWithSequential = false;
2934 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
2935 I != E; ++I)
2936 EndsWithSequential = I.isSequential();
2937 if (!EndsWithSequential)
2938 return nullptr;
2939
2940 // Replace: gep (gep %P, long B), long A, ...
2941 // With: T = long A+B; gep %P, T, ...
2942 Value *SO1 = Src->getOperand(Src->getNumOperands() - 1);
2943 Value *GO1 = GEP.getOperand(1);
2944
2945 // If they aren't the same type, then the input hasn't been processed
2946 // by the loop above yet (which canonicalizes sequential index types to
2947 // intptr_t). Just avoid transforming this until the input has been
2948 // normalized.
2949 if (SO1->getType() != GO1->getType())
2950 return nullptr;
2951
2952 Value *Sum =
2953 simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2954 // Only do the combine when we are sure the cost after the
2955 // merge is never more than that before the merge.
2956 if (Sum == nullptr)
2957 return nullptr;
2958
2960 Indices.append(Src->op_begin() + 1, Src->op_end() - 1);
2961 Indices.push_back(Sum);
2962 Indices.append(GEP.op_begin() + 2, GEP.op_end());
2963
2964 // Don't create GEPs with more than one non-zero index.
2965 unsigned NumNonZeroIndices = count_if(Indices, [](Value *Idx) {
2966 auto *C = dyn_cast<Constant>(Idx);
2967 return !C || !C->isNullValue();
2968 });
2969 if (NumNonZeroIndices > 1)
2970 return nullptr;
2971
2972 return replaceInstUsesWith(
2973 GEP, Builder.CreateGEP(
2974 Src->getSourceElementType(), Src->getOperand(0), Indices, "",
2976}
2977
2980 bool &DoesConsume, unsigned Depth) {
2981 static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
2982 // ~(~(X)) -> X.
2983 Value *A, *B;
2984 if (match(V, m_Not(m_Value(A)))) {
2985 DoesConsume = true;
2986 return A;
2987 }
2988
2989 Constant *C;
2990 // Constants can be considered to be not'ed values.
2991 if (match(V, m_ImmConstant(C)))
2992 return ConstantExpr::getNot(C);
2993
2995 return nullptr;
2996
2997 // The rest of the cases require that we invert all uses so don't bother
2998 // doing the analysis if we know we can't use the result.
2999 if (!WillInvertAllUses)
3000 return nullptr;
3001
3002 // Compares can be inverted if all of their uses are being modified to use
3003 // the ~V.
3004 if (auto *I = dyn_cast<CmpInst>(V)) {
3005 if (Builder != nullptr)
3006 return Builder->CreateCmp(I->getInversePredicate(), I->getOperand(0),
3007 I->getOperand(1));
3008 return NonNull;
3009 }
3010
3011 // If `V` is of the form `A + B` then `-1 - V` can be folded into
3012 // `(-1 - B) - A` if we are willing to invert all of the uses.
3013 if (match(V, m_Add(m_Value(A), m_Value(B)))) {
3014 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
3015 DoesConsume, Depth))
3016 return Builder ? Builder->CreateSub(BV, A) : NonNull;
3017 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3018 DoesConsume, Depth))
3019 return Builder ? Builder->CreateSub(AV, B) : NonNull;
3020 return nullptr;
3021 }
3022
3023 // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
3024 // into `A ^ B` if we are willing to invert all of the uses.
3025 if (match(V, m_Xor(m_Value(A), m_Value(B)))) {
3026 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
3027 DoesConsume, Depth))
3028 return Builder ? Builder->CreateXor(A, BV) : NonNull;
3029 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3030 DoesConsume, Depth))
3031 return Builder ? Builder->CreateXor(AV, B) : NonNull;
3032 return nullptr;
3033 }
3034
3035 // If `V` is of the form `B - A` then `-1 - V` can be folded into
3036 // `A + (-1 - B)` if we are willing to invert all of the uses.
3037 if (match(V, m_Sub(m_Value(A), m_Value(B)))) {
3038 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3039 DoesConsume, Depth))
3040 return Builder ? Builder->CreateAdd(AV, B) : NonNull;
3041 return nullptr;
3042 }
3043
3044 // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
3045 // into `A s>> B` if we are willing to invert all of the uses.
3046 if (match(V, m_AShr(m_Value(A), m_Value(B)))) {
3047 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3048 DoesConsume, Depth))
3049 return Builder ? Builder->CreateAShr(AV, B) : NonNull;
3050 return nullptr;
3051 }
3052
3053 Value *Cond;
3054 // LogicOps are special in that we canonicalize them at the cost of an
3055 // instruction.
3056 bool IsSelect = match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) &&
3058 // Selects/min/max with invertible operands are freely invertible
3059 if (IsSelect || match(V, m_MaxOrMin(m_Value(A), m_Value(B)))) {
3060 bool LocalDoesConsume = DoesConsume;
3061 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder*/ nullptr,
3062 LocalDoesConsume, Depth))
3063 return nullptr;
3064 if (Value *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3065 LocalDoesConsume, Depth)) {
3066 DoesConsume = LocalDoesConsume;
3067 if (Builder != nullptr) {
3068 Value *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
3069 DoesConsume, Depth);
3070 assert(NotB != nullptr &&
3071 "Unable to build inverted value for known freely invertable op");
3072 if (auto *II = dyn_cast<IntrinsicInst>(V))
3073 return Builder->CreateBinaryIntrinsic(
3074 getInverseMinMaxIntrinsic(II->getIntrinsicID()), NotA, NotB);
3075 return Builder->CreateSelect(
3076 Cond, NotA, NotB, "",
3078 }
3079 return NonNull;
3080 }
3081 }
3082
3083 if (PHINode *PN = dyn_cast<PHINode>(V)) {
3084 bool LocalDoesConsume = DoesConsume;
3086 for (Use &U : PN->operands()) {
3087 BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
3088 Value *NewIncomingVal = getFreelyInvertedImpl(
3089 U.get(), /*WillInvertAllUses=*/false,
3090 /*Builder=*/nullptr, LocalDoesConsume, MaxAnalysisRecursionDepth - 1);
3091 if (NewIncomingVal == nullptr)
3092 return nullptr;
3093 // Make sure that we can safely erase the original PHI node.
3094 if (NewIncomingVal == V)
3095 return nullptr;
3096 if (Builder != nullptr)
3097 IncomingValues.emplace_back(NewIncomingVal, IncomingBlock);
3098 }
3099
3100 DoesConsume = LocalDoesConsume;
3101 if (Builder != nullptr) {
3103 Builder->SetInsertPoint(PN);
3104 PHINode *NewPN =
3105 Builder->CreatePHI(PN->getType(), PN->getNumIncomingValues());
3106 for (auto [Val, Pred] : IncomingValues)
3107 NewPN->addIncoming(Val, Pred);
3108 return NewPN;
3109 }
3110 return NonNull;
3111 }
3112
3113 if (match(V, m_SExtLike(m_Value(A)))) {
3114 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3115 DoesConsume, Depth))
3116 return Builder ? Builder->CreateSExt(AV, V->getType()) : NonNull;
3117 return nullptr;
3118 }
3119
3120 if (match(V, m_Trunc(m_Value(A)))) {
3121 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3122 DoesConsume, Depth))
3123 return Builder ? Builder->CreateTrunc(AV, V->getType()) : NonNull;
3124 return nullptr;
3125 }
3126
3127 // De Morgan's Laws:
3128 // (~(A | B)) -> (~A & ~B)
3129 // (~(A & B)) -> (~A | ~B)
3130 auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
3131 bool IsLogical, Value *A,
3132 Value *B) -> Value * {
3133 bool LocalDoesConsume = DoesConsume;
3134 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder=*/nullptr,
3135 LocalDoesConsume, Depth))
3136 return nullptr;
3137 if (auto *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3138 LocalDoesConsume, Depth)) {
3139 auto *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
3140 LocalDoesConsume, Depth);
3141 DoesConsume = LocalDoesConsume;
3142 if (IsLogical)
3143 return Builder ? Builder->CreateLogicalOp(Opcode, NotA, NotB) : NonNull;
3144 return Builder ? Builder->CreateBinOp(Opcode, NotA, NotB) : NonNull;
3145 }
3146
3147 return nullptr;
3148 };
3149
3150 if (match(V, m_Or(m_Value(A), m_Value(B))))
3151 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
3152 B);
3153
3154 if (match(V, m_And(m_Value(A), m_Value(B))))
3155 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
3156 B);
3157
3158 if (match(V, m_LogicalOr(m_Value(A), m_Value(B))))
3159 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
3160 B);
3161
3162 if (match(V, m_LogicalAnd(m_Value(A), m_Value(B))))
3163 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
3164 B);
3165
3166 return nullptr;
3167}
3168
3169/// Return true if we should canonicalize the gep to an i8 ptradd.
3171 Value *PtrOp = GEP.getOperand(0);
3172 Type *GEPEltType = GEP.getSourceElementType();
3173 if (GEPEltType->isIntegerTy(8))
3174 return false;
3175
3176 // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
3177 // intrinsic. This has better support in BasicAA.
3178 if (GEPEltType->isScalableTy())
3179 return true;
3180
3181 // gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two multiplies
3182 // together.
3183 if (GEP.getNumIndices() == 1 &&
3184 match(GEP.getOperand(1),
3186 m_Shl(m_Value(), m_ConstantInt())))))
3187 return true;
3188
3189 // gep (gep %p, C1), %x, C2 is expanded so the two constants can
3190 // possibly be merged together.
3191 auto PtrOpGep = dyn_cast<GEPOperator>(PtrOp);
3192 return PtrOpGep && PtrOpGep->hasAllConstantIndices() &&
3193 any_of(GEP.indices(), [](Value *V) {
3194 const APInt *C;
3195 return match(V, m_APInt(C)) && !C->isZero();
3196 });
3197}
3198
3200 IRBuilderBase &Builder) {
3201 auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
3202 if (!Op1)
3203 return nullptr;
3204
3205 // Don't fold a GEP into itself through a PHI node. This can only happen
3206 // through the back-edge of a loop. Folding a GEP into itself means that
3207 // the value of the previous iteration needs to be stored in the meantime,
3208 // thus requiring an additional register variable to be live, but not
3209 // actually achieving anything (the GEP still needs to be executed once per
3210 // loop iteration).
3211 if (Op1 == &GEP)
3212 return nullptr;
3213 GEPNoWrapFlags NW = Op1->getNoWrapFlags();
3214
3215 int DI = -1;
3216
3217 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
3218 auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
3219 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
3220 Op1->getSourceElementType() != Op2->getSourceElementType())
3221 return nullptr;
3222
3223 // As for Op1 above, don't try to fold a GEP into itself.
3224 if (Op2 == &GEP)
3225 return nullptr;
3226
3227 // Keep track of the type as we walk the GEP.
3228 Type *CurTy = nullptr;
3229
3230 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
3231 if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
3232 return nullptr;
3233
3234 if (Op1->getOperand(J) != Op2->getOperand(J)) {
3235 if (DI == -1) {
3236 // We have not seen any differences yet in the GEPs feeding the
3237 // PHI yet, so we record this one if it is allowed to be a
3238 // variable.
3239
3240 // The first two arguments can vary for any GEP, the rest have to be
3241 // static for struct slots
3242 if (J > 1) {
3243 assert(CurTy && "No current type?");
3244 if (CurTy->isStructTy())
3245 return nullptr;
3246 }
3247
3248 DI = J;
3249 } else {
3250 // The GEP is different by more than one input. While this could be
3251 // extended to support GEPs that vary by more than one variable it
3252 // doesn't make sense since it greatly increases the complexity and
3253 // would result in an R+R+R addressing mode which no backend
3254 // directly supports and would need to be broken into several
3255 // simpler instructions anyway.
3256 return nullptr;
3257 }
3258 }
3259
3260 // Sink down a layer of the type for the next iteration.
3261 if (J > 0) {
3262 if (J == 1) {
3263 CurTy = Op1->getSourceElementType();
3264 } else {
3265 CurTy =
3266 GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
3267 }
3268 }
3269 }
3270
3271 NW &= Op2->getNoWrapFlags();
3272 }
3273
3274 // If not all GEPs are identical we'll have to create a new PHI node.
3275 // Check that the old PHI node has only one use so that it will get
3276 // removed.
3277 if (DI != -1 && !PN->hasOneUse())
3278 return nullptr;
3279
3280 auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
3281 NewGEP->setNoWrapFlags(NW);
3282
3283 if (DI == -1) {
3284 // All the GEPs feeding the PHI are identical. Clone one down into our
3285 // BB so that it can be merged with the current GEP.
3286 } else {
3287 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
3288 // into the current block so it can be merged, and create a new PHI to
3289 // set that index.
3290 PHINode *NewPN;
3291 {
3292 IRBuilderBase::InsertPointGuard Guard(Builder);
3293 Builder.SetInsertPoint(PN);
3294 NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
3295 PN->getNumOperands());
3296 }
3297
3298 for (auto &I : PN->operands())
3299 NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
3300 PN->getIncomingBlock(I));
3301
3302 NewGEP->setOperand(DI, NewPN);
3303 }
3304
3305 NewGEP->insertBefore(*GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
3306 return NewGEP;
3307}
3308
3310 Value *PtrOp = GEP.getOperand(0);
3311 SmallVector<Value *, 8> Indices(GEP.indices());
3312 Type *GEPType = GEP.getType();
3313 Type *GEPEltType = GEP.getSourceElementType();
3314 if (Value *V =
3315 simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.getNoWrapFlags(),
3316 SQ.getWithInstruction(&GEP)))
3317 return replaceInstUsesWith(GEP, V);
3318
3319 // For vector geps, use the generic demanded vector support.
3320 // Skip if GEP return type is scalable. The number of elements is unknown at
3321 // compile-time.
3322 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
3323 auto VWidth = GEPFVTy->getNumElements();
3324 APInt PoisonElts(VWidth, 0);
3325 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
3326 if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
3327 PoisonElts)) {
3328 if (V != &GEP)
3329 return replaceInstUsesWith(GEP, V);
3330 return &GEP;
3331 }
3332 }
3333
3334 // Eliminate unneeded casts for indices, and replace indices which displace
3335 // by multiples of a zero size type with zero.
3336 bool MadeChange = false;
3337
3338 // Index width may not be the same width as pointer width.
3339 // Data layout chooses the right type based on supported integer types.
3340 Type *NewScalarIndexTy =
3341 DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
3342
3344 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
3345 ++I, ++GTI) {
3346 // Skip indices into struct types.
3347 if (GTI.isStruct())
3348 continue;
3349
3350 Type *IndexTy = (*I)->getType();
3351 Type *NewIndexType =
3352 IndexTy->isVectorTy()
3353 ? VectorType::get(NewScalarIndexTy,
3354 cast<VectorType>(IndexTy)->getElementCount())
3355 : NewScalarIndexTy;
3356
3357 // If the element type has zero size then any index over it is equivalent
3358 // to an index of zero, so replace it with zero if it is not zero already.
3359 Type *EltTy = GTI.getIndexedType();
3360 if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
3361 if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
3362 *I = Constant::getNullValue(NewIndexType);
3363 MadeChange = true;
3364 }
3365
3366 if (IndexTy != NewIndexType) {
3367 // If we are using a wider index than needed for this platform, shrink
3368 // it to what we need. If narrower, sign-extend it to what we need.
3369 // This explicit cast can make subsequent optimizations more obvious.
3370 if (IndexTy->getScalarSizeInBits() <
3371 NewIndexType->getScalarSizeInBits()) {
3372 if (GEP.hasNoUnsignedWrap() && GEP.hasNoUnsignedSignedWrap())
3373 *I = Builder.CreateZExt(*I, NewIndexType, "", /*IsNonNeg=*/true);
3374 else
3375 *I = Builder.CreateSExt(*I, NewIndexType);
3376 } else {
3377 *I = Builder.CreateTrunc(*I, NewIndexType, "", GEP.hasNoUnsignedWrap(),
3378 GEP.hasNoUnsignedSignedWrap());
3379 }
3380 MadeChange = true;
3381 }
3382 }
3383 if (MadeChange)
3384 return &GEP;
3385
3386 // Canonicalize constant GEPs to i8 type.
3387 if (!GEPEltType->isIntegerTy(8) && GEP.hasAllConstantIndices()) {
3388 APInt Offset(DL.getIndexTypeSizeInBits(GEPType), 0);
3389 if (GEP.accumulateConstantOffset(DL, Offset))
3390 return replaceInstUsesWith(
3391 GEP, Builder.CreatePtrAdd(PtrOp, Builder.getInt(Offset), "",
3392 GEP.getNoWrapFlags()));
3393 }
3394
3396 Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
3397 Value *NewGEP =
3398 Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags());
3399 return replaceInstUsesWith(GEP, NewGEP);
3400 }
3401
3402 // Strip trailing zero indices.
3403 auto *LastIdx = dyn_cast<Constant>(Indices.back());
3404 if (LastIdx && LastIdx->isNullValue() && !LastIdx->getType()->isVectorTy()) {
3405 return replaceInstUsesWith(
3406 GEP, Builder.CreateGEP(GEP.getSourceElementType(), PtrOp,
3407 drop_end(Indices), "", GEP.getNoWrapFlags()));
3408 }
3409
3410 // Strip leading zero indices.
3411 auto *FirstIdx = dyn_cast<Constant>(Indices.front());
3412 if (FirstIdx && FirstIdx->isNullValue() &&
3413 !FirstIdx->getType()->isVectorTy()) {
3415 ++GTI;
3416 if (!GTI.isStruct() && GTI.getSequentialElementStride(DL) ==
3417 DL.getTypeAllocSize(GTI.getIndexedType()))
3418 return replaceInstUsesWith(GEP, Builder.CreateGEP(GTI.getIndexedType(),
3419 GEP.getPointerOperand(),
3420 drop_begin(Indices), "",
3421 GEP.getNoWrapFlags()));
3422 }
3423
3424 // Scalarize vector operands; prefer splat-of-gep.as canonical form.
3425 // Note that this looses information about undef lanes; we run it after
3426 // demanded bits to partially mitigate that loss.
3427 if (GEPType->isVectorTy() && llvm::any_of(GEP.operands(), [](Value *Op) {
3428 return Op->getType()->isVectorTy() && getSplatValue(Op);
3429 })) {
3430 SmallVector<Value *> NewOps;
3431 for (auto &Op : GEP.operands()) {
3432 if (Op->getType()->isVectorTy())
3433 if (Value *Scalar = getSplatValue(Op)) {
3434 NewOps.push_back(Scalar);
3435 continue;
3436 }
3437 NewOps.push_back(Op);
3438 }
3439
3440 Value *Res = Builder.CreateGEP(GEP.getSourceElementType(), NewOps[0],
3441 ArrayRef(NewOps).drop_front(), GEP.getName(),
3442 GEP.getNoWrapFlags());
3443 if (!Res->getType()->isVectorTy()) {
3444 ElementCount EC = cast<VectorType>(GEPType)->getElementCount();
3445 Res = Builder.CreateVectorSplat(EC, Res);
3446 }
3447 return replaceInstUsesWith(GEP, Res);
3448 }
3449
3450 bool SeenNonZeroIndex = false;
3451 for (auto [IdxNum, Idx] : enumerate(Indices)) {
3452 // Ignore one leading zero index.
3453 auto *C = dyn_cast<Constant>(Idx);
3454 if (C && C->isNullValue() && IdxNum == 0)
3455 continue;
3456
3457 if (!SeenNonZeroIndex) {
3458 SeenNonZeroIndex = true;
3459 continue;
3460 }
3461
3462 // GEP has multiple non-zero indices: Split it.
3463 ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(IdxNum);
3464 Value *FrontGEP =
3465 Builder.CreateGEP(GEPEltType, PtrOp, FrontIndices,
3466 GEP.getName() + ".split", GEP.getNoWrapFlags());
3467
3468 SmallVector<Value *> BackIndices;
3469 BackIndices.push_back(Constant::getNullValue(NewScalarIndexTy));
3470 append_range(BackIndices, drop_begin(Indices, IdxNum));
3472 GetElementPtrInst::getIndexedType(GEPEltType, FrontIndices), FrontGEP,
3473 BackIndices, GEP.getNoWrapFlags());
3474 }
3475
3476 // Canonicalize gep %T to gep [sizeof(%T) x i8]:
3477 auto IsCanonicalType = [](Type *Ty) {
3478 if (auto *AT = dyn_cast<ArrayType>(Ty))
3479 Ty = AT->getElementType();
3480 return Ty->isIntegerTy(8);
3481 };
3482 if (Indices.size() == 1 && !IsCanonicalType(GEPEltType)) {
3483 TypeSize Scale = DL.getTypeAllocSize(GEPEltType);
3484 assert(!Scale.isScalable() && "Should have been handled earlier");
3485 Type *NewElemTy = Builder.getInt8Ty();
3486 if (Scale.getFixedValue() != 1)
3487 NewElemTy = ArrayType::get(NewElemTy, Scale.getFixedValue());
3488 GEP.setSourceElementType(NewElemTy);
3489 GEP.setResultElementType(NewElemTy);
3490 // Don't bother revisiting the GEP after this change.
3491 MadeIRChange = true;
3492 }
3493
3494 // Check to see if the inputs to the PHI node are getelementptr instructions.
3495 if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
3496 if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder))
3497 return replaceOperand(GEP, 0, NewPtrOp);
3498 }
3499
3500 if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
3501 if (Instruction *I = visitGEPOfGEP(GEP, Src))
3502 return I;
3503
3504 if (GEP.getNumIndices() == 1) {
3505 unsigned AS = GEP.getPointerAddressSpace();
3506 if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
3507 DL.getIndexSizeInBits(AS)) {
3508 uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
3509
3510 if (TyAllocSize == 1) {
3511 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
3512 // but only if the result pointer is only used as if it were an integer.
3513 // (The case where the underlying object is the same is handled by
3514 // InstSimplify.)
3515 Value *X = GEP.getPointerOperand();
3516 Value *Y;
3517 if (match(GEP.getOperand(1), m_Sub(m_PtrToIntOrAddr(m_Value(Y)),
3519 GEPType == Y->getType()) {
3520 bool HasNonAddressBits =
3521 DL.getAddressSizeInBits(AS) != DL.getPointerSizeInBits(AS);
3522 bool Changed = GEP.replaceUsesWithIf(Y, [&](Use &U) {
3523 return isa<PtrToAddrInst, ICmpInst>(U.getUser()) ||
3524 (!HasNonAddressBits && isa<PtrToIntInst>(U.getUser()));
3525 });
3526 return Changed ? &GEP : nullptr;
3527 }
3528 } else if (auto *ExactIns =
3529 dyn_cast<PossiblyExactOperator>(GEP.getOperand(1))) {
3530 // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
3531 Value *V;
3532 if (ExactIns->isExact()) {
3533 if ((has_single_bit(TyAllocSize) &&
3534 match(GEP.getOperand(1),
3535 m_Shr(m_Value(V),
3536 m_SpecificInt(countr_zero(TyAllocSize))))) ||
3537 match(GEP.getOperand(1),
3538 m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize)))) {
3539 return GetElementPtrInst::Create(Builder.getInt8Ty(),
3540 GEP.getPointerOperand(), V,
3541 GEP.getNoWrapFlags());
3542 }
3543 }
3544 if (ExactIns->isExact() && ExactIns->hasOneUse()) {
3545 // Try to canonicalize non-i8 element type to i8 if the index is an
3546 // exact instruction. If the index is an exact instruction (div/shr)
3547 // with a constant RHS, we can fold the non-i8 element scale into the
3548 // div/shr (similiar to the mul case, just inverted).
3549 const APInt *C;
3550 std::optional<APInt> NewC;
3551 if (has_single_bit(TyAllocSize) &&
3552 match(ExactIns, m_Shr(m_Value(V), m_APInt(C))) &&
3553 C->uge(countr_zero(TyAllocSize)))
3554 NewC = *C - countr_zero(TyAllocSize);
3555 else if (match(ExactIns, m_UDiv(m_Value(V), m_APInt(C)))) {
3556 APInt Quot;
3557 uint64_t Rem;
3558 APInt::udivrem(*C, TyAllocSize, Quot, Rem);
3559 if (Rem == 0)
3560 NewC = Quot;
3561 } else if (match(ExactIns, m_SDiv(m_Value(V), m_APInt(C)))) {
3562 APInt Quot;
3563 int64_t Rem;
3564 APInt::sdivrem(*C, TyAllocSize, Quot, Rem);
3565 // For sdiv we need to make sure we arent creating INT_MIN / -1.
3566 if (!Quot.isAllOnes() && Rem == 0)
3567 NewC = Quot;
3568 }
3569
3570 if (NewC.has_value()) {
3571 Value *NewOp = Builder.CreateBinOp(
3572 static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), V,
3573 ConstantInt::get(V->getType(), *NewC));
3574 cast<BinaryOperator>(NewOp)->setIsExact();
3575 return GetElementPtrInst::Create(Builder.getInt8Ty(),
3576 GEP.getPointerOperand(), NewOp,
3577 GEP.getNoWrapFlags());
3578 }
3579 }
3580 }
3581 }
3582 }
3583 // We do not handle pointer-vector geps here.
3584 if (GEPType->isVectorTy())
3585 return nullptr;
3586
3587 if (!GEP.isInBounds()) {
3588 unsigned IdxWidth =
3589 DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
3590 APInt BasePtrOffset(IdxWidth, 0);
3591 Value *UnderlyingPtrOp =
3592 PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, BasePtrOffset);
3593 bool CanBeNull, CanBeFreed;
3594 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
3595 DL, CanBeNull, CanBeFreed);
3596 if (!CanBeNull && !CanBeFreed && DerefBytes != 0) {
3597 if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
3598 BasePtrOffset.isNonNegative()) {
3599 APInt AllocSize(IdxWidth, DerefBytes);
3600 if (BasePtrOffset.ule(AllocSize)) {
3602 GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
3603 }
3604 }
3605 }
3606 }
3607
3608 // nusw + nneg -> nuw
3609 if (GEP.hasNoUnsignedSignedWrap() && !GEP.hasNoUnsignedWrap() &&
3610 all_of(GEP.indices(), [&](Value *Idx) {
3611 return isKnownNonNegative(Idx, SQ.getWithInstruction(&GEP));
3612 })) {
3613 GEP.setNoWrapFlags(GEP.getNoWrapFlags() | GEPNoWrapFlags::noUnsignedWrap());
3614 return &GEP;
3615 }
3616
3617 // These rewrites are trying to preserve inbounds/nuw attributes. So we want
3618 // to do this after having tried to derive "nuw" above.
3619 if (GEP.getNumIndices() == 1) {
3620 // Given (gep p, x+y) we want to determine the common nowrap flags for both
3621 // geps if transforming into (gep (gep p, x), y).
3622 auto GetPreservedNoWrapFlags = [&](bool AddIsNUW) {
3623 // We can preserve both "inbounds nuw", "nusw nuw" and "nuw" if we know
3624 // that x + y does not have unsigned wrap.
3625 if (GEP.hasNoUnsignedWrap() && AddIsNUW)
3626 return GEP.getNoWrapFlags();
3627 return GEPNoWrapFlags::none();
3628 };
3629
3630 // Try to replace ADD + GEP with GEP + GEP.
3631 Value *Idx1, *Idx2;
3632 if (match(GEP.getOperand(1),
3633 m_OneUse(m_AddLike(m_Value(Idx1), m_Value(Idx2))))) {
3634 // %idx = add i64 %idx1, %idx2
3635 // %gep = getelementptr i32, ptr %ptr, i64 %idx
3636 // as:
3637 // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
3638 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
3639 bool NUW = match(GEP.getOperand(1), m_NUWAddLike(m_Value(), m_Value()));
3640 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3641 auto *NewPtr =
3642 Builder.CreateGEP(GEP.getSourceElementType(), GEP.getPointerOperand(),
3643 Idx1, "", NWFlags);
3644 return replaceInstUsesWith(GEP,
3645 Builder.CreateGEP(GEP.getSourceElementType(),
3646 NewPtr, Idx2, "", NWFlags));
3647 }
3648 ConstantInt *C;
3649 if (match(GEP.getOperand(1), m_OneUse(m_SExtLike(m_OneUse(m_NSWAddLike(
3650 m_Value(Idx1), m_ConstantInt(C))))))) {
3651 // %add = add nsw i32 %idx1, idx2
3652 // %sidx = sext i32 %add to i64
3653 // %gep = getelementptr i32, ptr %ptr, i64 %sidx
3654 // as:
3655 // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
3656 // %newgep = getelementptr i32, ptr %newptr, i32 idx2
3657 bool NUW = match(GEP.getOperand(1),
3659 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3660 auto *NewPtr = Builder.CreateGEP(
3661 GEP.getSourceElementType(), GEP.getPointerOperand(),
3662 Builder.CreateSExt(Idx1, GEP.getOperand(1)->getType()), "", NWFlags);
3663 return replaceInstUsesWith(
3664 GEP,
3665 Builder.CreateGEP(GEP.getSourceElementType(), NewPtr,
3666 Builder.CreateSExt(C, GEP.getOperand(1)->getType()),
3667 "", NWFlags));
3668 }
3669 }
3670
3672 return R;
3673
3674 // srem -> (and/urem) for inbounds+nuw GEP
3675 if (Indices.size() == 1 && GEP.isInBounds() && GEP.hasNoUnsignedWrap()) {
3676 Value *X, *Y;
3677
3678 // Match: idx = srem X, Y -- where Y is a power-of-two value.
3679 if (match(Indices[0], m_OneUse(m_SRem(m_Value(X), m_Value(Y)))) &&
3680 isKnownToBeAPowerOfTwo(Y, /*OrZero=*/true, &GEP)) {
3681 // If GEP is inbounds+nuw, the offset cannot be negative
3682 // -> srem by power-of-two can be treated as urem,
3683 // and urem by power-of-two folds to 'and' later.
3684 // OrZero=true is fine here because division by zero is UB.
3685 Instruction *OldIdxI = cast<Instruction>(Indices[0]);
3686 Value *NewIdx = Builder.CreateURem(X, Y, OldIdxI->getName());
3687
3688 return GetElementPtrInst::Create(GEPEltType, PtrOp, {NewIdx},
3689 GEP.getNoWrapFlags());
3690 }
3691 }
3692
3693 return nullptr;
3694}
3695
3697 Instruction *AI) {
3699 return true;
3700 if (auto *LI = dyn_cast<LoadInst>(V))
3701 return isa<GlobalVariable>(LI->getPointerOperand());
3702 // Two distinct allocations will never be equal.
3703 return isAllocLikeFn(V, &TLI) && V != AI;
3704}
3705
3706/// Given a call CB which uses an address UsedV, return true if we can prove the
3707/// call's only possible effect is storing to V.
3708static bool isRemovableWrite(CallBase &CB, Value *UsedV,
3709 const TargetLibraryInfo &TLI) {
3710 if (!CB.use_empty())
3711 // TODO: add recursion if returned attribute is present
3712 return false;
3713
3714 if (CB.isTerminator())
3715 // TODO: remove implementation restriction
3716 return false;
3717
3718 if (!CB.willReturn() || !CB.doesNotThrow())
3719 return false;
3720
3721 // If the only possible side effect of the call is writing to the alloca,
3722 // and the result isn't used, we can safely remove any reads implied by the
3723 // call including those which might read the alloca itself.
3724 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
3725 return Dest && Dest->Ptr == UsedV;
3726}
3727
3728static std::optional<ModRefInfo>
3730 const TargetLibraryInfo &TLI, bool KnowInit) {
3732 const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
3733 Worklist.push_back(AI);
3735
3736 do {
3737 Instruction *PI = Worklist.pop_back_val();
3738 for (User *U : PI->users()) {
3740 if (Users.size() >= MaxAllocSiteRemovableUsers)
3741 return std::nullopt;
3742 switch (I->getOpcode()) {
3743 default:
3744 // Give up the moment we see something we can't handle.
3745 return std::nullopt;
3746
3747 case Instruction::AddrSpaceCast:
3748 case Instruction::BitCast:
3749 case Instruction::GetElementPtr:
3750 Users.emplace_back(I);
3751 Worklist.push_back(I);
3752 continue;
3753
3754 case Instruction::ICmp: {
3755 ICmpInst *ICI = cast<ICmpInst>(I);
3756 // We can fold eq/ne comparisons with null to false/true, respectively.
3757 // We also fold comparisons in some conditions provided the alloc has
3758 // not escaped (see isNeverEqualToUnescapedAlloc).
3759 if (!ICI->isEquality())
3760 return std::nullopt;
3761 unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
3762 if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
3763 return std::nullopt;
3764
3765 // Do not fold compares to aligned_alloc calls, as they may have to
3766 // return null in case the required alignment cannot be satisfied,
3767 // unless we can prove that both alignment and size are valid.
3768 auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3769 // Check if alignment and size of a call to aligned_alloc is valid,
3770 // that is alignment is a power-of-2 and the size is a multiple of the
3771 // alignment.
3772 const APInt *Alignment;
3773 const APInt *Size;
3774 return match(CB->getArgOperand(0), m_APInt(Alignment)) &&
3775 match(CB->getArgOperand(1), m_APInt(Size)) &&
3776 Alignment->isPowerOf2() && Size->urem(*Alignment).isZero();
3777 };
3778 auto *CB = dyn_cast<CallBase>(AI);
3779 LibFunc TheLibFunc;
3780 if (CB && TLI.getLibFunc(*CB->getCalledFunction(), TheLibFunc) &&
3781 TLI.has(TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3782 !AlignmentAndSizeKnownValid(CB))
3783 return std::nullopt;
3784 Users.emplace_back(I);
3785 continue;
3786 }
3787
3788 case Instruction::Call:
3789 // Ignore no-op and store intrinsics.
3791 switch (II->getIntrinsicID()) {
3792 default:
3793 return std::nullopt;
3794
3795 case Intrinsic::memmove:
3796 case Intrinsic::memcpy:
3797 case Intrinsic::memset: {
3799 if (MI->isVolatile())
3800 return std::nullopt;
3801 // Note: this could also be ModRef, but we can still interpret that
3802 // as just Mod in that case.
3803 ModRefInfo NewAccess =
3804 MI->getRawDest() == PI ? ModRefInfo::Mod : ModRefInfo::Ref;
3805 if ((Access & ~NewAccess) != ModRefInfo::NoModRef)
3806 return std::nullopt;
3807 Access |= NewAccess;
3808 [[fallthrough]];
3809 }
3810 case Intrinsic::assume:
3811 case Intrinsic::invariant_start:
3812 case Intrinsic::invariant_end:
3813 case Intrinsic::lifetime_start:
3814 case Intrinsic::lifetime_end:
3815 case Intrinsic::objectsize:
3816 Users.emplace_back(I);
3817 continue;
3818 case Intrinsic::launder_invariant_group:
3819 case Intrinsic::strip_invariant_group:
3820 Users.emplace_back(I);
3821 Worklist.push_back(I);
3822 continue;
3823 }
3824 }
3825
3826 if (Family && getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
3827 getAllocationFamily(I, &TLI) == Family) {
3828 Users.emplace_back(I);
3829 continue;
3830 }
3831
3832 if (Family && getReallocatedOperand(cast<CallBase>(I)) == PI &&
3833 getAllocationFamily(I, &TLI) == Family) {
3834 Users.emplace_back(I);
3835 Worklist.push_back(I);
3836 continue;
3837 }
3838
3839 if (!isRefSet(Access) &&
3840 isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
3842 Users.emplace_back(I);
3843 continue;
3844 }
3845
3846 return std::nullopt;
3847
3848 case Instruction::Store: {
3850 if (SI->isVolatile() || SI->getPointerOperand() != PI)
3851 return std::nullopt;
3852 if (isRefSet(Access))
3853 return std::nullopt;
3855 Users.emplace_back(I);
3856 continue;
3857 }
3858
3859 case Instruction::Load: {
3860 LoadInst *LI = cast<LoadInst>(I);
3861 if (LI->isVolatile() || LI->getPointerOperand() != PI)
3862 return std::nullopt;
3863 if (isModSet(Access))
3864 return std::nullopt;
3866 Users.emplace_back(I);
3867 continue;
3868 }
3869 }
3870 llvm_unreachable("missing a return?");
3871 }
3872 } while (!Worklist.empty());
3873
3875 return Access;
3876}
3877
3880
3881 // If we have a malloc call which is only used in any amount of comparisons to
3882 // null and free calls, delete the calls and replace the comparisons with true
3883 // or false as appropriate.
3884
3885 // This is based on the principle that we can substitute our own allocation
3886 // function (which will never return null) rather than knowledge of the
3887 // specific function being called. In some sense this can change the permitted
3888 // outputs of a program (when we convert a malloc to an alloca, the fact that
3889 // the allocation is now on the stack is potentially visible, for example),
3890 // but we believe in a permissible manner.
3891 //
3892 // Collect into Instruction* first to avoid expensive WeakTrackingVH
3893 // register/unregister overhead; convert to WeakTrackingVH only when the
3894 // site is actually removable.
3896
3897 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
3898 // before each store.
3900 std::unique_ptr<DIBuilder> DIB;
3901 if (isa<AllocaInst>(MI)) {
3902 findDbgUsers(&MI, DVRs);
3903 DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
3904 }
3905
3906 // Determine what getInitialValueOfAllocation would return without actually
3907 // allocating the result.
3908 bool KnowInitUndef = false;
3909 bool KnowInitZero = false;
3910 Constant *Init =
3912 if (Init) {
3913 if (isa<UndefValue>(Init))
3914 KnowInitUndef = true;
3915 else if (Init->isNullValue())
3916 KnowInitZero = true;
3917 }
3918 // The various sanitizers don't actually return undef memory, but rather
3919 // memory initialized with special forms of runtime poison
3920 auto &F = *MI.getFunction();
3921 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
3922 F.hasFnAttribute(Attribute::SanitizeAddress))
3923 KnowInitUndef = false;
3924
3925 auto Removable =
3926 isAllocSiteRemovable(&MI, RawUsers, TLI, KnowInitZero | KnowInitUndef);
3927 if (Removable) {
3928 SmallVector<WeakTrackingVH, 64> Users(RawUsers.begin(), RawUsers.end());
3929 for (WeakTrackingVH &User : Users) {
3930 // Lowering all @llvm.objectsize and MTI calls first because they may use
3931 // a bitcast/GEP of the alloca we are removing.
3932 if (!User)
3933 continue;
3934
3936
3938 if (II->getIntrinsicID() == Intrinsic::objectsize) {
3939 SmallVector<Instruction *> InsertedInstructions;
3940 Value *Result = lowerObjectSizeCall(
3941 II, DL, &TLI, AA, /*MustSucceed=*/true, &InsertedInstructions);
3942 for (Instruction *Inserted : InsertedInstructions)
3943 Worklist.add(Inserted);
3944 replaceInstUsesWith(*I, Result);
3946 User = nullptr; // Skip examining in the next loop.
3947 continue;
3948 }
3949 if (auto *MTI = dyn_cast<MemTransferInst>(I)) {
3950 if (KnowInitZero && isRefSet(*Removable)) {
3952 Builder.SetInsertPoint(MTI);
3953 auto *M = Builder.CreateMemSet(
3954 MTI->getRawDest(),
3955 ConstantInt::get(Type::getInt8Ty(MI.getContext()), 0),
3956 MTI->getLength(), MTI->getDestAlign());
3957 M->copyMetadata(*MTI);
3958 }
3959 }
3960 }
3961 }
3962 for (WeakTrackingVH &User : Users) {
3963 if (!User)
3964 continue;
3965
3967
3968 if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
3970 *C, ConstantInt::get(C->getType(), C->isFalseWhenEqual()));
3971 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
3972 for (auto *DVR : DVRs)
3973 if (DVR->isAddressOfVariable())
3975 } else {
3976 // Casts, GEP, or anything else: we're about to delete this instruction,
3977 // so it can not have any valid uses.
3979 if (isa<LoadInst>(I)) {
3980 assert(KnowInitZero || KnowInitUndef);
3981 Replace = KnowInitUndef ? UndefValue::get(I->getType())
3982 : Constant::getNullValue(I->getType());
3983 } else
3984 Replace = PoisonValue::get(I->getType());
3986 }
3988 }
3989
3991 // Replace invoke with a NOP intrinsic to maintain the original CFG
3992 Module *M = II->getModule();
3993 Function *F = Intrinsic::getOrInsertDeclaration(M, Intrinsic::donothing);
3994 auto *NewII = InvokeInst::Create(
3995 F, II->getNormalDest(), II->getUnwindDest(), {}, "", II->getParent());
3996 NewII->setDebugLoc(II->getDebugLoc());
3997 }
3998
3999 // Remove debug intrinsics which describe the value contained within the
4000 // alloca. In addition to removing dbg.{declare,addr} which simply point to
4001 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
4002 //
4003 // ```
4004 // define void @foo(i32 %0) {
4005 // %a = alloca i32 ; Deleted.
4006 // store i32 %0, i32* %a
4007 // dbg.value(i32 %0, "arg0") ; Not deleted.
4008 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
4009 // call void @trivially_inlinable_no_op(i32* %a)
4010 // ret void
4011 // }
4012 // ```
4013 //
4014 // This may not be required if we stop describing the contents of allocas
4015 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
4016 // the LowerDbgDeclare utility.
4017 //
4018 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
4019 // "arg0" dbg.value may be stale after the call. However, failing to remove
4020 // the DW_OP_deref dbg.value causes large gaps in location coverage.
4021 //
4022 // FIXME: the Assignment Tracking project has now likely made this
4023 // redundant (and it's sometimes harmful).
4024 for (auto *DVR : DVRs)
4025 if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref())
4026 DVR->eraseFromParent();
4027
4028 return eraseInstFromFunction(MI);
4029 }
4030 return nullptr;
4031}
4032
4033/// Move the call to free before a NULL test.
4034///
4035/// Check if this free is accessed after its argument has been test
4036/// against NULL (property 0).
4037/// If yes, it is legal to move this call in its predecessor block.
4038///
4039/// The move is performed only if the block containing the call to free
4040/// will be removed, i.e.:
4041/// 1. it has only one predecessor P, and P has two successors
4042/// 2. it contains the call, noops, and an unconditional branch
4043/// 3. its successor is the same as its predecessor's successor
4044///
4045/// The profitability is out-of concern here and this function should
4046/// be called only if the caller knows this transformation would be
4047/// profitable (e.g., for code size).
4049 const DataLayout &DL) {
4050 Value *Op = FI.getArgOperand(0);
4051 BasicBlock *FreeInstrBB = FI.getParent();
4052 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
4053
4054 // Validate part of constraint #1: Only one predecessor
4055 // FIXME: We can extend the number of predecessor, but in that case, we
4056 // would duplicate the call to free in each predecessor and it may
4057 // not be profitable even for code size.
4058 if (!PredBB)
4059 return nullptr;
4060
4061 // Validate constraint #2: Does this block contains only the call to
4062 // free, noops, and an unconditional branch?
4063 BasicBlock *SuccBB;
4064 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
4065 if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
4066 return nullptr;
4067
4068 // If there are only 2 instructions in the block, at this point,
4069 // this is the call to free and unconditional.
4070 // If there are more than 2 instructions, check that they are noops
4071 // i.e., they won't hurt the performance of the generated code.
4072 if (FreeInstrBB->size() != 2) {
4073 for (const Instruction &Inst : *FreeInstrBB) {
4074 if (&Inst == &FI || &Inst == FreeInstrBBTerminator ||
4076 continue;
4077 auto *Cast = dyn_cast<CastInst>(&Inst);
4078 if (!Cast || !Cast->isNoopCast(DL))
4079 return nullptr;
4080 }
4081 }
4082 // Validate the rest of constraint #1 by matching on the pred branch.
4083 Instruction *TI = PredBB->getTerminator();
4084 BasicBlock *TrueBB, *FalseBB;
4085 CmpPredicate Pred;
4086 if (!match(TI, m_Br(m_ICmp(Pred,
4088 m_Specific(Op->stripPointerCasts())),
4089 m_Zero()),
4090 TrueBB, FalseBB)))
4091 return nullptr;
4092 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
4093 return nullptr;
4094
4095 // Validate constraint #3: Ensure the null case just falls through.
4096 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
4097 return nullptr;
4098 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
4099 "Broken CFG: missing edge from predecessor to successor");
4100
4101 // At this point, we know that everything in FreeInstrBB can be moved
4102 // before TI.
4103 for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
4104 if (&Instr == FreeInstrBBTerminator)
4105 break;
4106 Instr.moveBeforePreserving(TI->getIterator());
4107 }
4108 assert(FreeInstrBB->size() == 1 &&
4109 "Only the branch instruction should remain");
4110
4111 // Now that we've moved the call to free before the NULL check, we have to
4112 // remove any attributes on its parameter that imply it's non-null, because
4113 // those attributes might have only been valid because of the NULL check, and
4114 // we can get miscompiles if we keep them. This is conservative if non-null is
4115 // also implied by something other than the NULL check, but it's guaranteed to
4116 // be correct, and the conservativeness won't matter in practice, since the
4117 // attributes are irrelevant for the call to free itself and the pointer
4118 // shouldn't be used after the call.
4119 AttributeList Attrs = FI.getAttributes();
4120 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
4121 Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
4122 if (Dereferenceable.isValid()) {
4123 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
4124 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
4125 Attribute::Dereferenceable);
4126 Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
4127 }
4128 FI.setAttributes(Attrs);
4129
4130 return &FI;
4131}
4132
4134 // free undef -> unreachable.
4135 if (isa<UndefValue>(Op)) {
4136 // Leave a marker since we can't modify the CFG here.
4138 return eraseInstFromFunction(FI);
4139 }
4140
4141 // If we have 'free null' delete the instruction. This can happen in stl code
4142 // when lots of inlining happens.
4144 return eraseInstFromFunction(FI);
4145
4146 // If we had free(realloc(...)) with no intervening uses, then eliminate the
4147 // realloc() entirely.
4149 if (CI && CI->hasOneUse())
4150 if (Value *ReallocatedOp = getReallocatedOperand(CI))
4151 return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
4152
4153 // If we optimize for code size, try to move the call to free before the null
4154 // test so that simplify cfg can remove the empty block and dead code
4155 // elimination the branch. I.e., helps to turn something like:
4156 // if (foo) free(foo);
4157 // into
4158 // free(foo);
4159 //
4160 // Note that we can only do this for 'free' and not for any flavor of
4161 // 'operator delete'; there is no 'operator delete' symbol for which we are
4162 // permitted to invent a call, even if we're passing in a null pointer.
4163 if (MinimizeSize) {
4164 LibFunc Func;
4165 if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
4167 return I;
4168 }
4169
4170 return nullptr;
4171}
4172
4174 Value *RetVal = RI.getReturnValue();
4175 if (!RetVal)
4176 return nullptr;
4177
4178 Function *F = RI.getFunction();
4179 Type *RetTy = RetVal->getType();
4180 if (RetTy->isPointerTy()) {
4181 bool HasDereferenceable =
4182 F->getAttributes().getRetDereferenceableBytes() > 0;
4183 if (F->hasRetAttribute(Attribute::NonNull) ||
4184 (HasDereferenceable &&
4186 if (Value *V = simplifyNonNullOperand(RetVal, HasDereferenceable))
4187 return replaceOperand(RI, 0, V);
4188 }
4189 }
4190
4191 if (!AttributeFuncs::isNoFPClassCompatibleType(RetTy))
4192 return nullptr;
4193
4194 FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
4195 if (ReturnClass == fcNone)
4196 return nullptr;
4197
4198 KnownFPClass KnownClass;
4199 if (SimplifyDemandedFPClass(&RI, 0, ~ReturnClass, KnownClass,
4200 SQ.getWithInstruction(&RI)))
4201 return &RI;
4202
4203 return nullptr;
4204}
4205
4206// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
4208 // Try to remove the previous instruction if it must lead to unreachable.
4209 // This includes instructions like stores and "llvm.assume" that may not get
4210 // removed by simple dead code elimination.
4211 bool Changed = false;
4212 while (Instruction *Prev = I.getPrevNode()) {
4213 // While we theoretically can erase EH, that would result in a block that
4214 // used to start with an EH no longer starting with EH, which is invalid.
4215 // To make it valid, we'd need to fixup predecessors to no longer refer to
4216 // this block, but that changes CFG, which is not allowed in InstCombine.
4217 if (Prev->isEHPad())
4218 break; // Can not drop any more instructions. We're done here.
4219
4221 break; // Can not drop any more instructions. We're done here.
4222 // Otherwise, this instruction can be freely erased,
4223 // even if it is not side-effect free.
4224
4225 // A value may still have uses before we process it here (for example, in
4226 // another unreachable block), so convert those to poison.
4227 replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
4228 eraseInstFromFunction(*Prev);
4229 Changed = true;
4230 }
4231 return Changed;
4232}
4233
4238
4240 // If this store is the second-to-last instruction in the basic block
4241 // (excluding debug info) and if the block ends with
4242 // an unconditional branch, try to move the store to the successor block.
4243
4244 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
4245 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
4246 do {
4247 if (BBI != FirstInstr)
4248 --BBI;
4249 } while (BBI != FirstInstr && BBI->isDebugOrPseudoInst());
4250
4251 return dyn_cast<StoreInst>(BBI);
4252 };
4253
4254 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
4256 return &BI;
4257
4258 return nullptr;
4259}
4260
4263 if (!DeadEdges.insert({From, To}).second)
4264 return;
4265
4266 // Replace phi node operands in successor with poison.
4267 for (PHINode &PN : To->phis())
4268 for (Use &U : PN.incoming_values())
4269 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(U)) {
4270 replaceUse(U, PoisonValue::get(PN.getType()));
4271 addToWorklist(&PN);
4272 MadeIRChange = true;
4273 }
4274
4275 Worklist.push_back(To);
4276}
4277
4278// Under the assumption that I is unreachable, remove it and following
4279// instructions. Changes are reported directly to MadeIRChange.
4282 BasicBlock *BB = I->getParent();
4283 for (Instruction &Inst : make_early_inc_range(
4284 make_range(std::next(BB->getTerminator()->getReverseIterator()),
4285 std::next(I->getReverseIterator())))) {
4286 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
4287 replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType()));
4288 MadeIRChange = true;
4289 }
4290 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
4291 continue;
4292 // RemoveDIs: erase debug-info on this instruction manually.
4293 Inst.dropDbgRecords();
4295 MadeIRChange = true;
4296 }
4297
4300 MadeIRChange = true;
4301 for (Value *V : Changed)
4303 }
4304
4305 // Handle potentially dead successors.
4306 for (BasicBlock *Succ : successors(BB))
4307 addDeadEdge(BB, Succ, Worklist);
4308}
4309
4312 while (!Worklist.empty()) {
4313 BasicBlock *BB = Worklist.pop_back_val();
4314 if (!all_of(predecessors(BB), [&](BasicBlock *Pred) {
4315 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
4316 }))
4317 continue;
4318
4320 }
4321}
4322
4324 BasicBlock *LiveSucc) {
4326 for (BasicBlock *Succ : successors(BB)) {
4327 // The live successor isn't dead.
4328 if (Succ == LiveSucc)
4329 continue;
4330
4331 addDeadEdge(BB, Succ, Worklist);
4332 }
4333
4335}
4336
4338 // Change br (not X), label True, label False to: br X, label False, True
4339 Value *Cond = BI.getCondition();
4340 Value *X;
4341 if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) {
4342 // Swap Destinations and condition...
4343 BI.swapSuccessors();
4344 if (BPI)
4345 BPI->swapSuccEdgesProbabilities(BI.getParent());
4346 return replaceOperand(BI, 0, X);
4347 }
4348
4349 // Canonicalize logical-and-with-invert as logical-or-with-invert.
4350 // This is done by inverting the condition and swapping successors:
4351 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
4352 Value *Y;
4353 if (isa<SelectInst>(Cond) &&
4354 match(Cond,
4356 Value *NotX = Builder.CreateNot(X, "not." + X->getName());
4357 Value *Or = Builder.CreateLogicalOr(NotX, Y);
4358
4359 // Set weights for the new OR select instruction too.
4361 if (auto *OrInst = dyn_cast<Instruction>(Or)) {
4362 if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
4363 SmallVector<uint32_t> Weights;
4364 if (extractBranchWeights(*CondInst, Weights)) {
4365 assert(Weights.size() == 2 &&
4366 "Unexpected number of branch weights!");
4367 std::swap(Weights[0], Weights[1]);
4368 setBranchWeights(*OrInst, Weights, /*IsExpected=*/false);
4369 }
4370 }
4371 }
4372 }
4373 BI.swapSuccessors();
4374 if (BPI)
4375 BPI->swapSuccEdgesProbabilities(BI.getParent());
4376 return replaceOperand(BI, 0, Or);
4377 }
4378
4379 // If the condition is irrelevant, remove the use so that other
4380 // transforms on the condition become more effective.
4381 if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1))
4382 return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType()));
4383
4384 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
4385 CmpPredicate Pred;
4386 if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) &&
4387 !isCanonicalPredicate(Pred)) {
4388 // Swap destinations and condition.
4389 auto *Cmp = cast<CmpInst>(Cond);
4390 Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
4391 BI.swapSuccessors();
4392 if (BPI)
4393 BPI->swapSuccEdgesProbabilities(BI.getParent());
4394 Worklist.push(Cmp);
4395 return &BI;
4396 }
4397
4398 if (isa<UndefValue>(Cond)) {
4399 handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr);
4400 return nullptr;
4401 }
4402 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
4403 handlePotentiallyDeadSuccessors(BI.getParent(),
4404 BI.getSuccessor(!CI->getZExtValue()));
4405 return nullptr;
4406 }
4407
4408 // Replace all dominated uses of the condition with true/false
4409 // Ignore constant expressions to avoid iterating over uses on other
4410 // functions.
4411 if (!isa<Constant>(Cond) && BI.getSuccessor(0) != BI.getSuccessor(1)) {
4412 for (auto &U : make_early_inc_range(Cond->uses())) {
4413 BasicBlockEdge Edge0(BI.getParent(), BI.getSuccessor(0));
4414 if (DT.dominates(Edge0, U)) {
4415 replaceUse(U, ConstantInt::getTrue(Cond->getType()));
4416 addToWorklist(cast<Instruction>(U.getUser()));
4417 continue;
4418 }
4419 BasicBlockEdge Edge1(BI.getParent(), BI.getSuccessor(1));
4420 if (DT.dominates(Edge1, U)) {
4421 replaceUse(U, ConstantInt::getFalse(Cond->getType()));
4422 addToWorklist(cast<Instruction>(U.getUser()));
4423 }
4424 }
4425 }
4426
4427 DC.registerBranch(&BI);
4428 return nullptr;
4429}
4430
4431// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
4432// we can prove that both (switch C) and (switch X) go to the default when cond
4433// is false/true.
4436 bool IsTrueArm) {
4437 unsigned CstOpIdx = IsTrueArm ? 1 : 2;
4438 auto *C = dyn_cast<ConstantInt>(Select->getOperand(CstOpIdx));
4439 if (!C)
4440 return nullptr;
4441
4442 BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
4443 if (CstBB != SI.getDefaultDest())
4444 return nullptr;
4445 Value *X = Select->getOperand(3 - CstOpIdx);
4446 CmpPredicate Pred;
4447 const APInt *RHSC;
4448 if (!match(Select->getCondition(),
4449 m_ICmp(Pred, m_Specific(X), m_APInt(RHSC))))
4450 return nullptr;
4451 if (IsTrueArm)
4452 Pred = ICmpInst::getInversePredicate(Pred);
4453
4454 // See whether we can replace the select with X
4456 for (auto Case : SI.cases())
4457 if (!CR.contains(Case.getCaseValue()->getValue()))
4458 return nullptr;
4459
4460 return X;
4461}
4462
4464 Value *Cond = SI.getCondition();
4465 Value *Op0;
4466 const APInt *CondOpC;
4467 using InvertFn = std::function<APInt(const APInt &Case, const APInt &C)>;
4468
4469 auto MaybeInvertible = [&](Value *Cond) -> InvertFn {
4470 if (match(Cond, m_Add(m_Value(Op0), m_APInt(CondOpC))))
4471 // Change 'switch (X+C) case Case:' into 'switch (X) case Case-C'.
4472 return [](const APInt &Case, const APInt &C) { return Case - C; };
4473
4474 if (match(Cond, m_Sub(m_APInt(CondOpC), m_Value(Op0))))
4475 // Change 'switch (C-X) case Case:' into 'switch (X) case C-Case'.
4476 return [](const APInt &Case, const APInt &C) { return C - Case; };
4477
4478 if (match(Cond, m_Xor(m_Value(Op0), m_APInt(CondOpC))) &&
4479 !CondOpC->isMinSignedValue() && !CondOpC->isMaxSignedValue())
4480 // Change 'switch (X^C) case Case:' into 'switch (X) case Case^C'.
4481 // Prevent creation of large case values by excluding extremes.
4482 return [](const APInt &Case, const APInt &C) { return Case ^ C; };
4483
4484 return nullptr;
4485 };
4486
4487 // Attempt to invert and simplify the switch condition, as long as the
4488 // condition is not used further, as it may not be profitable otherwise.
4489 if (auto InvertFn = MaybeInvertible(Cond); InvertFn && Cond->hasOneUse()) {
4490 for (auto &Case : SI.cases()) {
4491 const APInt &New = InvertFn(Case.getCaseValue()->getValue(), *CondOpC);
4492 Case.setValue(ConstantInt::get(SI.getContext(), New));
4493 }
4494 return replaceOperand(SI, 0, Op0);
4495 }
4496
4497 uint64_t ShiftAmt;
4498 if (match(Cond, m_Shl(m_Value(Op0), m_ConstantInt(ShiftAmt))) &&
4499 ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
4500 all_of(SI.cases(), [&](const auto &Case) {
4501 return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
4502 })) {
4503 // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
4505 if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() ||
4506 Shl->hasOneUse()) {
4507 Value *NewCond = Op0;
4508 if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
4509 // If the shift may wrap, we need to mask off the shifted bits.
4510 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
4511 NewCond = Builder.CreateAnd(
4512 Op0, APInt::getLowBitsSet(BitWidth, BitWidth - ShiftAmt));
4513 }
4514 for (auto Case : SI.cases()) {
4515 const APInt &CaseVal = Case.getCaseValue()->getValue();
4516 APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
4517 : CaseVal.lshr(ShiftAmt);
4518 Case.setValue(ConstantInt::get(SI.getContext(), ShiftedCase));
4519 }
4520 return replaceOperand(SI, 0, NewCond);
4521 }
4522 }
4523
4524 // Fold switch(zext/sext(X)) into switch(X) if possible.
4525 if (match(Cond, m_ZExtOrSExt(m_Value(Op0)))) {
4526 bool IsZExt = isa<ZExtInst>(Cond);
4527 Type *SrcTy = Op0->getType();
4528 unsigned NewWidth = SrcTy->getScalarSizeInBits();
4529
4530 if (all_of(SI.cases(), [&](const auto &Case) {
4531 const APInt &CaseVal = Case.getCaseValue()->getValue();
4532 return IsZExt ? CaseVal.isIntN(NewWidth)
4533 : CaseVal.isSignedIntN(NewWidth);
4534 })) {
4535 for (auto &Case : SI.cases()) {
4536 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
4537 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
4538 }
4539 return replaceOperand(SI, 0, Op0);
4540 }
4541 }
4542
4543 // Fold switch(select cond, X, Y) into switch(X/Y) if possible
4544 if (auto *Select = dyn_cast<SelectInst>(Cond)) {
4545 if (Value *V =
4546 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true))
4547 return replaceOperand(SI, 0, V);
4548 if (Value *V =
4549 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false))
4550 return replaceOperand(SI, 0, V);
4551 }
4552
4553 KnownBits Known = computeKnownBits(Cond, &SI);
4554 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
4555 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
4556
4557 // Compute the number of leading bits we can ignore.
4558 // TODO: A better way to determine this would use ComputeNumSignBits().
4559 for (const auto &C : SI.cases()) {
4560 LeadingKnownZeros =
4561 std::min(LeadingKnownZeros, C.getCaseValue()->getValue().countl_zero());
4562 LeadingKnownOnes =
4563 std::min(LeadingKnownOnes, C.getCaseValue()->getValue().countl_one());
4564 }
4565
4566 unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
4567
4568 // Shrink the condition operand if the new type is smaller than the old type.
4569 // But do not shrink to a non-standard type, because backend can't generate
4570 // good code for that yet.
4571 // TODO: We can make it aggressive again after fixing PR39569.
4572 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
4573 shouldChangeType(Known.getBitWidth(), NewWidth)) {
4574 IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
4575 Builder.SetInsertPoint(&SI);
4576 Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
4577
4578 for (auto Case : SI.cases()) {
4579 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
4580 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
4581 }
4582 return replaceOperand(SI, 0, NewCond);
4583 }
4584
4585 if (isa<UndefValue>(Cond)) {
4586 handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr);
4587 return nullptr;
4588 }
4589 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
4591 SI.findCaseValue(CI)->getCaseSuccessor());
4592 return nullptr;
4593 }
4594
4595 return nullptr;
4596}
4597
4599InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
4601 if (!WO)
4602 return nullptr;
4603
4604 Intrinsic::ID OvID = WO->getIntrinsicID();
4605 const APInt *C = nullptr;
4606 if (match(WO->getRHS(), m_APIntAllowPoison(C))) {
4607 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
4608 OvID == Intrinsic::umul_with_overflow)) {
4609 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
4610 if (C->isAllOnes())
4611 return BinaryOperator::CreateNeg(WO->getLHS());
4612 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
4613 if (C->isPowerOf2()) {
4614 return BinaryOperator::CreateShl(
4615 WO->getLHS(),
4616 ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
4617 }
4618 }
4619 }
4620
4621 // We're extracting from an overflow intrinsic. See if we're the only user.
4622 // That allows us to simplify multiple result intrinsics to simpler things
4623 // that just get one value.
4624 if (!WO->hasOneUse())
4625 return nullptr;
4626
4627 // Check if we're grabbing only the result of a 'with overflow' intrinsic
4628 // and replace it with a traditional binary instruction.
4629 if (*EV.idx_begin() == 0) {
4630 Instruction::BinaryOps BinOp = WO->getBinaryOp();
4631 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
4632 // Replace the old instruction's uses with poison.
4633 replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
4635 return BinaryOperator::Create(BinOp, LHS, RHS);
4636 }
4637
4638 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
4639
4640 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
4641 if (OvID == Intrinsic::usub_with_overflow)
4642 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
4643
4644 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
4645 // +1 is not possible because we assume signed values.
4646 if (OvID == Intrinsic::smul_with_overflow &&
4647 WO->getLHS()->getType()->isIntOrIntVectorTy(1))
4648 return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS());
4649
4650 // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
4651 if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
4652 unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
4653 // Only handle even bitwidths for performance reasons.
4654 if (BitWidth % 2 == 0)
4655 return new ICmpInst(
4656 ICmpInst::ICMP_UGT, WO->getLHS(),
4657 ConstantInt::get(WO->getLHS()->getType(),
4659 }
4660
4661 // If only the overflow result is used, and the right hand side is a
4662 // constant (or constant splat), we can remove the intrinsic by directly
4663 // checking for overflow.
4664 if (C) {
4665 // Compute the no-wrap range for LHS given RHS=C, then construct an
4666 // equivalent icmp, potentially using an offset.
4667 ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
4668 WO->getBinaryOp(), *C, WO->getNoWrapKind());
4669
4670 CmpInst::Predicate Pred;
4671 APInt NewRHSC, Offset;
4672 NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
4673 auto *OpTy = WO->getRHS()->getType();
4674 auto *NewLHS = WO->getLHS();
4675 if (Offset != 0)
4676 NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
4677 return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
4678 ConstantInt::get(OpTy, NewRHSC));
4679 }
4680
4681 return nullptr;
4682}
4683
4686 InstCombiner::BuilderTy &Builder) {
4687 // Helper to fold frexp of select to select of frexp.
4688
4689 if (!SelectInst->hasOneUse() || !FrexpCall->hasOneUse())
4690 return nullptr;
4692 Value *TrueVal = SelectInst->getTrueValue();
4693 Value *FalseVal = SelectInst->getFalseValue();
4694
4695 const APFloat *ConstVal = nullptr;
4696 Value *VarOp = nullptr;
4697 bool ConstIsTrue = false;
4698
4699 if (match(TrueVal, m_APFloat(ConstVal))) {
4700 VarOp = FalseVal;
4701 ConstIsTrue = true;
4702 } else if (match(FalseVal, m_APFloat(ConstVal))) {
4703 VarOp = TrueVal;
4704 ConstIsTrue = false;
4705 } else {
4706 return nullptr;
4707 }
4708
4709 Builder.SetInsertPoint(&EV);
4710
4711 CallInst *NewFrexp =
4712 Builder.CreateCall(FrexpCall->getCalledFunction(), {VarOp}, "frexp");
4713 NewFrexp->copyIRFlags(FrexpCall);
4714
4715 Value *NewEV = Builder.CreateExtractValue(NewFrexp, 0, "mantissa");
4716
4717 int Exp;
4718 APFloat Mantissa = frexp(*ConstVal, Exp, APFloat::rmNearestTiesToEven);
4719
4720 Constant *ConstantMantissa = ConstantFP::get(TrueVal->getType(), Mantissa);
4721
4722 Value *NewSel = Builder.CreateSelectFMF(
4723 Cond, ConstIsTrue ? ConstantMantissa : NewEV,
4724 ConstIsTrue ? NewEV : ConstantMantissa, SelectInst, "select.frexp");
4725 return NewSel;
4726}
4728 Value *Agg = EV.getAggregateOperand();
4729
4730 if (!EV.hasIndices())
4731 return replaceInstUsesWith(EV, Agg);
4732
4733 if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
4734 SQ.getWithInstruction(&EV)))
4735 return replaceInstUsesWith(EV, V);
4736
4737 Value *Cond, *TrueVal, *FalseVal;
4739 m_Value(Cond), m_Value(TrueVal), m_Value(FalseVal)))))) {
4740 auto *SelInst =
4741 cast<SelectInst>(cast<IntrinsicInst>(Agg)->getArgOperand(0));
4742 if (Value *Result =
4743 foldFrexpOfSelect(EV, cast<IntrinsicInst>(Agg), SelInst, Builder))
4744 return replaceInstUsesWith(EV, Result);
4745 }
4747 // We're extracting from an insertvalue instruction, compare the indices
4748 const unsigned *exti, *exte, *insi, *inse;
4749 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
4750 exte = EV.idx_end(), inse = IV->idx_end();
4751 exti != exte && insi != inse;
4752 ++exti, ++insi) {
4753 if (*insi != *exti)
4754 // The insert and extract both reference distinctly different elements.
4755 // This means the extract is not influenced by the insert, and we can
4756 // replace the aggregate operand of the extract with the aggregate
4757 // operand of the insert. i.e., replace
4758 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4759 // %E = extractvalue { i32, { i32 } } %I, 0
4760 // with
4761 // %E = extractvalue { i32, { i32 } } %A, 0
4762 return ExtractValueInst::Create(IV->getAggregateOperand(),
4763 EV.getIndices());
4764 }
4765 if (exti == exte && insi == inse)
4766 // Both iterators are at the end: Index lists are identical. Replace
4767 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4768 // %C = extractvalue { i32, { i32 } } %B, 1, 0
4769 // with "i32 42"
4770 return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
4771 if (exti == exte) {
4772 // The extract list is a prefix of the insert list. i.e. replace
4773 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4774 // %E = extractvalue { i32, { i32 } } %I, 1
4775 // with
4776 // %X = extractvalue { i32, { i32 } } %A, 1
4777 // %E = insertvalue { i32 } %X, i32 42, 0
4778 // by switching the order of the insert and extract (though the
4779 // insertvalue should be left in, since it may have other uses).
4780 Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
4781 EV.getIndices());
4782 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
4783 ArrayRef(insi, inse));
4784 }
4785 if (insi == inse)
4786 // The insert list is a prefix of the extract list
4787 // We can simply remove the common indices from the extract and make it
4788 // operate on the inserted value instead of the insertvalue result.
4789 // i.e., replace
4790 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4791 // %E = extractvalue { i32, { i32 } } %I, 1, 0
4792 // with
4793 // %E extractvalue { i32 } { i32 42 }, 0
4794 return ExtractValueInst::Create(IV->getInsertedValueOperand(),
4795 ArrayRef(exti, exte));
4796 }
4797
4798 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
4799 return R;
4800
4801 if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
4802 // Bail out if the aggregate contains scalable vector type
4803 if (auto *STy = dyn_cast<StructType>(Agg->getType());
4804 STy && STy->isScalableTy())
4805 return nullptr;
4806
4807 // If the (non-volatile) load only has one use, we can rewrite this to a
4808 // load from a GEP. This reduces the size of the load. If a load is used
4809 // only by extractvalue instructions then this either must have been
4810 // optimized before, or it is a struct with padding, in which case we
4811 // don't want to do the transformation as it loses padding knowledge.
4812 if (L->isSimple() && L->hasOneUse()) {
4813 // extractvalue has integer indices, getelementptr has Value*s. Convert.
4814 SmallVector<Value*, 4> Indices;
4815 // Prefix an i32 0 since we need the first element.
4816 Indices.push_back(Builder.getInt32(0));
4817 for (unsigned Idx : EV.indices())
4818 Indices.push_back(Builder.getInt32(Idx));
4819
4820 // We need to insert these at the location of the old load, not at that of
4821 // the extractvalue.
4822 Builder.SetInsertPoint(L);
4823 Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
4824 L->getPointerOperand(), Indices);
4825 Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
4826 // Whatever aliasing information we had for the orignal load must also
4827 // hold for the smaller load, so propagate the annotations.
4828 NL->setAAMetadata(L->getAAMetadata());
4829 // Returning the load directly will cause the main loop to insert it in
4830 // the wrong spot, so use replaceInstUsesWith().
4831 return replaceInstUsesWith(EV, NL);
4832 }
4833 }
4834
4835 if (auto *PN = dyn_cast<PHINode>(Agg))
4836 if (Instruction *Res = foldOpIntoPhi(EV, PN))
4837 return Res;
4838
4839 // Canonicalize extract (select Cond, TV, FV)
4840 // -> select cond, (extract TV), (extract FV)
4841 if (auto *SI = dyn_cast<SelectInst>(Agg))
4842 if (Instruction *R = FoldOpIntoSelect(EV, SI, /*FoldWithMultiUse=*/true))
4843 return R;
4844
4845 // We could simplify extracts from other values. Note that nested extracts may
4846 // already be simplified implicitly by the above: extract (extract (insert) )
4847 // will be translated into extract ( insert ( extract ) ) first and then just
4848 // the value inserted, if appropriate. Similarly for extracts from single-use
4849 // loads: extract (extract (load)) will be translated to extract (load (gep))
4850 // and if again single-use then via load (gep (gep)) to load (gep).
4851 // However, double extracts from e.g. function arguments or return values
4852 // aren't handled yet.
4853 return nullptr;
4854}
4855
4856/// Return 'true' if the given typeinfo will match anything.
4857static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
4858 switch (Personality) {
4862 // The GCC C EH and Rust personality only exists to support cleanups, so
4863 // it's not clear what the semantics of catch clauses are.
4864 return false;
4866 return false;
4868 // While __gnat_all_others_value will match any Ada exception, it doesn't
4869 // match foreign exceptions (or didn't, before gcc-4.7).
4870 return false;
4881 return isa<ConstantPointerNull>(TypeInfo);
4882 }
4883 llvm_unreachable("invalid enum");
4884}
4885
4886static bool shorter_filter(const Value *LHS, const Value *RHS) {
4887 return
4888 cast<ArrayType>(LHS->getType())->getNumElements()
4889 <
4890 cast<ArrayType>(RHS->getType())->getNumElements();
4891}
4892
4894 // The logic here should be correct for any real-world personality function.
4895 // However if that turns out not to be true, the offending logic can always
4896 // be conditioned on the personality function, like the catch-all logic is.
4897 EHPersonality Personality =
4898 classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
4899
4900 // Simplify the list of clauses, eg by removing repeated catch clauses
4901 // (these are often created by inlining).
4902 bool MakeNewInstruction = false; // If true, recreate using the following:
4903 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
4904 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4905
4906 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
4907 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
4908 bool isLastClause = i + 1 == e;
4909 if (LI.isCatch(i)) {
4910 // A catch clause.
4911 Constant *CatchClause = LI.getClause(i);
4912 Constant *TypeInfo = CatchClause->stripPointerCasts();
4913
4914 // If we already saw this clause, there is no point in having a second
4915 // copy of it.
4916 if (AlreadyCaught.insert(TypeInfo).second) {
4917 // This catch clause was not already seen.
4918 NewClauses.push_back(CatchClause);
4919 } else {
4920 // Repeated catch clause - drop the redundant copy.
4921 MakeNewInstruction = true;
4922 }
4923
4924 // If this is a catch-all then there is no point in keeping any following
4925 // clauses or marking the landingpad as having a cleanup.
4926 if (isCatchAll(Personality, TypeInfo)) {
4927 if (!isLastClause)
4928 MakeNewInstruction = true;
4929 CleanupFlag = false;
4930 break;
4931 }
4932 } else {
4933 // A filter clause. If any of the filter elements were already caught
4934 // then they can be dropped from the filter. It is tempting to try to
4935 // exploit the filter further by saying that any typeinfo that does not
4936 // occur in the filter can't be caught later (and thus can be dropped).
4937 // However this would be wrong, since typeinfos can match without being
4938 // equal (for example if one represents a C++ class, and the other some
4939 // class derived from it).
4940 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4941 Constant *FilterClause = LI.getClause(i);
4942 ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
4943 unsigned NumTypeInfos = FilterType->getNumElements();
4944
4945 // An empty filter catches everything, so there is no point in keeping any
4946 // following clauses or marking the landingpad as having a cleanup. By
4947 // dealing with this case here the following code is made a bit simpler.
4948 if (!NumTypeInfos) {
4949 NewClauses.push_back(FilterClause);
4950 if (!isLastClause)
4951 MakeNewInstruction = true;
4952 CleanupFlag = false;
4953 break;
4954 }
4955
4956 bool MakeNewFilter = false; // If true, make a new filter.
4957 SmallVector<Constant *, 16> NewFilterElts; // New elements.
4958 if (isa<ConstantAggregateZero>(FilterClause)) {
4959 // Not an empty filter - it contains at least one null typeinfo.
4960 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
4961 Constant *TypeInfo =
4963 // If this typeinfo is a catch-all then the filter can never match.
4964 if (isCatchAll(Personality, TypeInfo)) {
4965 // Throw the filter away.
4966 MakeNewInstruction = true;
4967 continue;
4968 }
4969
4970 // There is no point in having multiple copies of this typeinfo, so
4971 // discard all but the first copy if there is more than one.
4972 NewFilterElts.push_back(TypeInfo);
4973 if (NumTypeInfos > 1)
4974 MakeNewFilter = true;
4975 } else {
4976 ConstantArray *Filter = cast<ConstantArray>(FilterClause);
4977 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
4978 NewFilterElts.reserve(NumTypeInfos);
4979
4980 // Remove any filter elements that were already caught or that already
4981 // occurred in the filter. While there, see if any of the elements are
4982 // catch-alls. If so, the filter can be discarded.
4983 bool SawCatchAll = false;
4984 for (unsigned j = 0; j != NumTypeInfos; ++j) {
4985 Constant *Elt = Filter->getOperand(j);
4986 Constant *TypeInfo = Elt->stripPointerCasts();
4987 if (isCatchAll(Personality, TypeInfo)) {
4988 // This element is a catch-all. Bail out, noting this fact.
4989 SawCatchAll = true;
4990 break;
4991 }
4992
4993 // Even if we've seen a type in a catch clause, we don't want to
4994 // remove it from the filter. An unexpected type handler may be
4995 // set up for a call site which throws an exception of the same
4996 // type caught. In order for the exception thrown by the unexpected
4997 // handler to propagate correctly, the filter must be correctly
4998 // described for the call site.
4999 //
5000 // Example:
5001 //
5002 // void unexpected() { throw 1;}
5003 // void foo() throw (int) {
5004 // std::set_unexpected(unexpected);
5005 // try {
5006 // throw 2.0;
5007 // } catch (int i) {}
5008 // }
5009
5010 // There is no point in having multiple copies of the same typeinfo in
5011 // a filter, so only add it if we didn't already.
5012 if (SeenInFilter.insert(TypeInfo).second)
5013 NewFilterElts.push_back(cast<Constant>(Elt));
5014 }
5015 // A filter containing a catch-all cannot match anything by definition.
5016 if (SawCatchAll) {
5017 // Throw the filter away.
5018 MakeNewInstruction = true;
5019 continue;
5020 }
5021
5022 // If we dropped something from the filter, make a new one.
5023 if (NewFilterElts.size() < NumTypeInfos)
5024 MakeNewFilter = true;
5025 }
5026 if (MakeNewFilter) {
5027 FilterType = ArrayType::get(FilterType->getElementType(),
5028 NewFilterElts.size());
5029 FilterClause = ConstantArray::get(FilterType, NewFilterElts);
5030 MakeNewInstruction = true;
5031 }
5032
5033 NewClauses.push_back(FilterClause);
5034
5035 // If the new filter is empty then it will catch everything so there is
5036 // no point in keeping any following clauses or marking the landingpad
5037 // as having a cleanup. The case of the original filter being empty was
5038 // already handled above.
5039 if (MakeNewFilter && !NewFilterElts.size()) {
5040 assert(MakeNewInstruction && "New filter but not a new instruction!");
5041 CleanupFlag = false;
5042 break;
5043 }
5044 }
5045 }
5046
5047 // If several filters occur in a row then reorder them so that the shortest
5048 // filters come first (those with the smallest number of elements). This is
5049 // advantageous because shorter filters are more likely to match, speeding up
5050 // unwinding, but mostly because it increases the effectiveness of the other
5051 // filter optimizations below.
5052 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
5053 unsigned j;
5054 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
5055 for (j = i; j != e; ++j)
5056 if (!isa<ArrayType>(NewClauses[j]->getType()))
5057 break;
5058
5059 // Check whether the filters are already sorted by length. We need to know
5060 // if sorting them is actually going to do anything so that we only make a
5061 // new landingpad instruction if it does.
5062 for (unsigned k = i; k + 1 < j; ++k)
5063 if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
5064 // Not sorted, so sort the filters now. Doing an unstable sort would be
5065 // correct too but reordering filters pointlessly might confuse users.
5066 std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
5068 MakeNewInstruction = true;
5069 break;
5070 }
5071
5072 // Look for the next batch of filters.
5073 i = j + 1;
5074 }
5075
5076 // If typeinfos matched if and only if equal, then the elements of a filter L
5077 // that occurs later than a filter F could be replaced by the intersection of
5078 // the elements of F and L. In reality two typeinfos can match without being
5079 // equal (for example if one represents a C++ class, and the other some class
5080 // derived from it) so it would be wrong to perform this transform in general.
5081 // However the transform is correct and useful if F is a subset of L. In that
5082 // case L can be replaced by F, and thus removed altogether since repeating a
5083 // filter is pointless. So here we look at all pairs of filters F and L where
5084 // L follows F in the list of clauses, and remove L if every element of F is
5085 // an element of L. This can occur when inlining C++ functions with exception
5086 // specifications.
5087 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
5088 // Examine each filter in turn.
5089 Value *Filter = NewClauses[i];
5090 ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
5091 if (!FTy)
5092 // Not a filter - skip it.
5093 continue;
5094 unsigned FElts = FTy->getNumElements();
5095 // Examine each filter following this one. Doing this backwards means that
5096 // we don't have to worry about filters disappearing under us when removed.
5097 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
5098 Value *LFilter = NewClauses[j];
5099 ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
5100 if (!LTy)
5101 // Not a filter - skip it.
5102 continue;
5103 // If Filter is a subset of LFilter, i.e. every element of Filter is also
5104 // an element of LFilter, then discard LFilter.
5105 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
5106 // If Filter is empty then it is a subset of LFilter.
5107 if (!FElts) {
5108 // Discard LFilter.
5109 NewClauses.erase(J);
5110 MakeNewInstruction = true;
5111 // Move on to the next filter.
5112 continue;
5113 }
5114 unsigned LElts = LTy->getNumElements();
5115 // If Filter is longer than LFilter then it cannot be a subset of it.
5116 if (FElts > LElts)
5117 // Move on to the next filter.
5118 continue;
5119 // At this point we know that LFilter has at least one element.
5120 if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
5121 // Filter is a subset of LFilter iff Filter contains only zeros (as we
5122 // already know that Filter is not longer than LFilter).
5124 assert(FElts <= LElts && "Should have handled this case earlier!");
5125 // Discard LFilter.
5126 NewClauses.erase(J);
5127 MakeNewInstruction = true;
5128 }
5129 // Move on to the next filter.
5130 continue;
5131 }
5132 ConstantArray *LArray = cast<ConstantArray>(LFilter);
5133 if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
5134 // Since Filter is non-empty and contains only zeros, it is a subset of
5135 // LFilter iff LFilter contains a zero.
5136 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
5137 for (unsigned l = 0; l != LElts; ++l)
5138 if (isa<ConstantPointerNull>(LArray->getOperand(l))) {
5139 // LFilter contains a zero - discard it.
5140 NewClauses.erase(J);
5141 MakeNewInstruction = true;
5142 break;
5143 }
5144 // Move on to the next filter.
5145 continue;
5146 }
5147 // At this point we know that both filters are ConstantArrays. Loop over
5148 // operands to see whether every element of Filter is also an element of
5149 // LFilter. Since filters tend to be short this is probably faster than
5150 // using a method that scales nicely.
5152 bool AllFound = true;
5153 for (unsigned f = 0; f != FElts; ++f) {
5154 Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
5155 AllFound = false;
5156 for (unsigned l = 0; l != LElts; ++l) {
5157 Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
5158 if (LTypeInfo == FTypeInfo) {
5159 AllFound = true;
5160 break;
5161 }
5162 }
5163 if (!AllFound)
5164 break;
5165 }
5166 if (AllFound) {
5167 // Discard LFilter.
5168 NewClauses.erase(J);
5169 MakeNewInstruction = true;
5170 }
5171 // Move on to the next filter.
5172 }
5173 }
5174
5175 // If we changed any of the clauses, replace the old landingpad instruction
5176 // with a new one.
5177 if (MakeNewInstruction) {
5179 NewClauses.size());
5180 for (Constant *C : NewClauses)
5181 NLI->addClause(C);
5182 // A landing pad with no clauses must have the cleanup flag set. It is
5183 // theoretically possible, though highly unlikely, that we eliminated all
5184 // clauses. If so, force the cleanup flag to true.
5185 if (NewClauses.empty())
5186 CleanupFlag = true;
5187 NLI->setCleanup(CleanupFlag);
5188 return NLI;
5189 }
5190
5191 // Even if none of the clauses changed, we may nonetheless have understood
5192 // that the cleanup flag is pointless. Clear it if so.
5193 if (LI.isCleanup() != CleanupFlag) {
5194 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
5195 LI.setCleanup(CleanupFlag);
5196 return &LI;
5197 }
5198
5199 return nullptr;
5200}
5201
5202Value *
5204 // Try to push freeze through instructions that propagate but don't produce
5205 // poison as far as possible. If an operand of freeze follows three
5206 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
5207 // guaranteed-non-poison operands then push the freeze through to the one
5208 // operand that is not guaranteed non-poison. The actual transform is as
5209 // follows.
5210 // Op1 = ... ; Op1 can be posion
5211 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
5212 // ; single guaranteed-non-poison operands
5213 // ... = Freeze(Op0)
5214 // =>
5215 // Op1 = ...
5216 // Op1.fr = Freeze(Op1)
5217 // ... = Inst(Op1.fr, NonPoisonOps...)
5218 auto *OrigOp = OrigFI.getOperand(0);
5219 auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
5220
5221 // While we could change the other users of OrigOp to use freeze(OrigOp), that
5222 // potentially reduces their optimization potential, so let's only do this iff
5223 // the OrigOp is only used by the freeze.
5224 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
5225 return nullptr;
5226
5227 // We can't push the freeze through an instruction which can itself create
5228 // poison. If the only source of new poison is flags, we can simply
5229 // strip them (since we know the only use is the freeze and nothing can
5230 // benefit from them.)
5232 /*ConsiderFlagsAndMetadata*/ false))
5233 return nullptr;
5234
5235 // If operand is guaranteed not to be poison, there is no need to add freeze
5236 // to the operand. So we first find the operand that is not guaranteed to be
5237 // poison.
5238 Value *MaybePoisonOperand = nullptr;
5239 for (Value *V : OrigOpInst->operands()) {
5241 // Treat identical operands as a single operand.
5242 (MaybePoisonOperand && MaybePoisonOperand == V))
5243 continue;
5244 if (!MaybePoisonOperand)
5245 MaybePoisonOperand = V;
5246 else
5247 return nullptr;
5248 }
5249
5250 OrigOpInst->dropPoisonGeneratingAnnotations();
5251
5252 // If all operands are guaranteed to be non-poison, we can drop freeze.
5253 if (!MaybePoisonOperand)
5254 return OrigOp;
5255
5256 Builder.SetInsertPoint(OrigOpInst);
5257 Value *FrozenMaybePoisonOperand = Builder.CreateFreeze(
5258 MaybePoisonOperand, MaybePoisonOperand->getName() + ".fr");
5259
5260 OrigOpInst->replaceUsesOfWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
5261 return OrigOp;
5262}
5263
5265 PHINode *PN) {
5266 // Detect whether this is a recurrence with a start value and some number of
5267 // backedge values. We'll check whether we can push the freeze through the
5268 // backedge values (possibly dropping poison flags along the way) until we
5269 // reach the phi again. In that case, we can move the freeze to the start
5270 // value.
5271 Use *StartU = nullptr;
5273 for (Use &U : PN->incoming_values()) {
5274 if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
5275 // Add backedge value to worklist.
5276 Worklist.push_back(U.get());
5277 continue;
5278 }
5279
5280 // Don't bother handling multiple start values.
5281 if (StartU)
5282 return nullptr;
5283 StartU = &U;
5284 }
5285
5286 if (!StartU || Worklist.empty())
5287 return nullptr; // Not a recurrence.
5288
5289 Value *StartV = StartU->get();
5290 BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
5291 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
5292 // We can't insert freeze if the start value is the result of the
5293 // terminator (e.g. an invoke).
5294 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
5295 return nullptr;
5296
5299 while (!Worklist.empty()) {
5300 Value *V = Worklist.pop_back_val();
5301 if (!Visited.insert(V).second)
5302 continue;
5303
5304 if (Visited.size() > 32)
5305 return nullptr; // Limit the total number of values we inspect.
5306
5307 // Assume that PN is non-poison, because it will be after the transform.
5308 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
5309 continue;
5310
5313 /*ConsiderFlagsAndMetadata*/ false))
5314 return nullptr;
5315
5316 DropFlags.push_back(I);
5317 append_range(Worklist, I->operands());
5318 }
5319
5320 for (Instruction *I : DropFlags)
5321 I->dropPoisonGeneratingAnnotations();
5322
5323 if (StartNeedsFreeze) {
5324 Builder.SetInsertPoint(StartBB->getTerminator());
5325 Value *FrozenStartV = Builder.CreateFreeze(StartV,
5326 StartV->getName() + ".fr");
5327 replaceUse(*StartU, FrozenStartV);
5328 }
5329 return replaceInstUsesWith(FI, PN);
5330}
5331
5333 Value *Op = FI.getOperand(0);
5334
5335 if (isa<Constant>(Op) || Op->hasOneUse())
5336 return false;
5337
5338 // Move the freeze directly after the definition of its operand, so that
5339 // it dominates the maximum number of uses. Note that it may not dominate
5340 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
5341 // the normal/default destination. This is why the domination check in the
5342 // replacement below is still necessary.
5343 BasicBlock::iterator MoveBefore;
5344 if (isa<Argument>(Op)) {
5345 MoveBefore =
5347 } else {
5348 auto MoveBeforeOpt = cast<Instruction>(Op)->getInsertionPointAfterDef();
5349 if (!MoveBeforeOpt)
5350 return false;
5351 MoveBefore = *MoveBeforeOpt;
5352 }
5353
5354 // Re-point iterator to come after any debug-info records.
5355 MoveBefore.setHeadBit(false);
5356
5357 bool Changed = false;
5358 if (&FI != &*MoveBefore) {
5359 FI.moveBefore(*MoveBefore->getParent(), MoveBefore);
5360 Changed = true;
5361 }
5362
5364 Changed |= Op->replaceUsesWithIf(&FI, [&](Use &U) -> bool {
5365 if (!DT.dominates(&FI, U))
5366 return false;
5367
5368 Users.push_back(U.getUser());
5369 return true;
5370 });
5371
5372 for (auto *U : Users) {
5373 for (auto &AssumeVH : AC.assumptionsFor(U)) {
5374 if (!AssumeVH)
5375 continue;
5376 AC.updateAffectedValues(cast<AssumeInst>(AssumeVH));
5377 }
5378 }
5379
5380 return Changed;
5381}
5382
5383// Check if any direct or bitcast user of this value is a shuffle instruction.
5385 for (auto *U : V->users()) {
5387 return true;
5388 else if (match(U, m_BitCast(m_Specific(V))) && isUsedWithinShuffleVector(U))
5389 return true;
5390 }
5391 return false;
5392}
5393
5395 Value *Op0 = I.getOperand(0);
5396
5397 if (Value *V = simplifyFreezeInst(Op0, SQ.getWithInstruction(&I)))
5398 return replaceInstUsesWith(I, V);
5399
5400 // freeze (phi const, x) --> phi const, (freeze x)
5401 if (auto *PN = dyn_cast<PHINode>(Op0)) {
5402 if (Instruction *NV = foldOpIntoPhi(I, PN))
5403 return NV;
5404 if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
5405 return NV;
5406 }
5407
5409 return replaceInstUsesWith(I, NI);
5410
5411 // If I is freeze(undef), check its uses and fold it to a fixed constant.
5412 // - or: pick -1
5413 // - select's condition: if the true value is constant, choose it by making
5414 // the condition true.
5415 // - phi: pick the common constant across operands
5416 // - default: pick 0
5417 //
5418 // Note that this transform is intentionally done here rather than
5419 // via an analysis in InstSimplify or at individual user sites. That is
5420 // because we must produce the same value for all uses of the freeze -
5421 // it's the reason "freeze" exists!
5422 //
5423 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
5424 // duplicating logic for binops at least.
5425 auto getUndefReplacement = [&](Type *Ty) {
5426 auto pickCommonConstantFromPHI = [](PHINode &PN) -> Value * {
5427 // phi(freeze(undef), C, C). Choose C for freeze so the PHI can be
5428 // removed.
5429 Constant *BestValue = nullptr;
5430 for (Value *V : PN.incoming_values()) {
5431 if (match(V, m_Freeze(m_Undef())))
5432 continue;
5433
5435 if (!C)
5436 return nullptr;
5437
5439 return nullptr;
5440
5441 if (BestValue && BestValue != C)
5442 return nullptr;
5443
5444 BestValue = C;
5445 }
5446 return BestValue;
5447 };
5448
5449 Value *NullValue = Constant::getNullValue(Ty);
5450 Value *BestValue = nullptr;
5451 for (auto *U : I.users()) {
5452 Value *V = NullValue;
5453 if (match(U, m_Or(m_Value(), m_Value())))
5455 else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
5456 V = ConstantInt::getTrue(Ty);
5457 else if (match(U, m_c_Select(m_Specific(&I), m_Value(V)))) {
5458 if (V == &I || !isGuaranteedNotToBeUndefOrPoison(V, &AC, &I, &DT))
5459 V = NullValue;
5460 } else if (auto *PHI = dyn_cast<PHINode>(U)) {
5461 if (Value *MaybeV = pickCommonConstantFromPHI(*PHI))
5462 V = MaybeV;
5463 }
5464
5465 if (!BestValue)
5466 BestValue = V;
5467 else if (BestValue != V)
5468 BestValue = NullValue;
5469 }
5470 assert(BestValue && "Must have at least one use");
5471 assert(BestValue != &I && "Cannot replace with itself");
5472 return BestValue;
5473 };
5474
5475 if (match(Op0, m_Undef())) {
5476 // Don't fold freeze(undef/poison) if it's used as a vector operand in
5477 // a shuffle. This may improve codegen for shuffles that allow
5478 // unspecified inputs.
5480 return nullptr;
5481 return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
5482 }
5483
5484 auto getFreezeVectorReplacement = [](Constant *C) -> Constant * {
5485 Type *Ty = C->getType();
5486 auto *VTy = dyn_cast<FixedVectorType>(Ty);
5487 if (!VTy)
5488 return nullptr;
5489 unsigned NumElts = VTy->getNumElements();
5490 Constant *BestValue = Constant::getNullValue(VTy->getScalarType());
5491 for (unsigned i = 0; i != NumElts; ++i) {
5492 Constant *EltC = C->getAggregateElement(i);
5493 if (EltC && !match(EltC, m_Undef())) {
5494 BestValue = EltC;
5495 break;
5496 }
5497 }
5498 return Constant::replaceUndefsWith(C, BestValue);
5499 };
5500
5501 Constant *C;
5502 if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement() &&
5503 !C->containsConstantExpression()) {
5504 if (Constant *Repl = getFreezeVectorReplacement(C))
5505 return replaceInstUsesWith(I, Repl);
5506 }
5507
5508 // Replace uses of Op with freeze(Op).
5509 if (freezeOtherUses(I))
5510 return &I;
5511
5512 return nullptr;
5513}
5514
5515/// Check for case where the call writes to an otherwise dead alloca. This
5516/// shows up for unused out-params in idiomatic C/C++ code. Note that this
5517/// helper *only* analyzes the write; doesn't check any other legality aspect.
5519 auto *CB = dyn_cast<CallBase>(I);
5520 if (!CB)
5521 // TODO: handle e.g. store to alloca here - only worth doing if we extend
5522 // to allow reload along used path as described below. Otherwise, this
5523 // is simply a store to a dead allocation which will be removed.
5524 return false;
5525 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
5526 if (!Dest)
5527 return false;
5528 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
5529 if (!AI)
5530 // TODO: allow malloc?
5531 return false;
5532 // TODO: allow memory access dominated by move point? Note that since AI
5533 // could have a reference to itself captured by the call, we would need to
5534 // account for cycles in doing so.
5535 SmallVector<const User *> AllocaUsers;
5537 auto pushUsers = [&](const Instruction &I) {
5538 for (const User *U : I.users()) {
5539 if (Visited.insert(U).second)
5540 AllocaUsers.push_back(U);
5541 }
5542 };
5543 pushUsers(*AI);
5544 while (!AllocaUsers.empty()) {
5545 auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
5546 if (isa<GetElementPtrInst>(UserI) || isa<AddrSpaceCastInst>(UserI)) {
5547 pushUsers(*UserI);
5548 continue;
5549 }
5550 if (UserI == CB)
5551 continue;
5552 // TODO: support lifetime.start/end here
5553 return false;
5554 }
5555 return true;
5556}
5557
5558/// Try to move the specified instruction from its current block into the
5559/// beginning of DestBlock, which can only happen if it's safe to move the
5560/// instruction past all of the instructions between it and the end of its
5561/// block.
5563 BasicBlock *DestBlock) {
5564 BasicBlock *SrcBlock = I->getParent();
5565
5566 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
5567 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
5568 I->isTerminator())
5569 return false;
5570
5571 // Do not sink static or dynamic alloca instructions. Static allocas must
5572 // remain in the entry block, and dynamic allocas must not be sunk in between
5573 // a stacksave / stackrestore pair, which would incorrectly shorten its
5574 // lifetime.
5575 if (isa<AllocaInst>(I))
5576 return false;
5577
5578 // Do not sink into catchswitch blocks.
5579 if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
5580 return false;
5581
5582 // Do not sink convergent call instructions.
5583 if (auto *CI = dyn_cast<CallInst>(I)) {
5584 if (CI->isConvergent())
5585 return false;
5586 }
5587
5588 // Unless we can prove that the memory write isn't visibile except on the
5589 // path we're sinking to, we must bail.
5590 if (I->mayWriteToMemory()) {
5591 if (!SoleWriteToDeadLocal(I, TLI))
5592 return false;
5593 }
5594
5595 // We can only sink load instructions if there is nothing between the load and
5596 // the end of block that could change the value.
5597 if (I->mayReadFromMemory() &&
5598 !I->hasMetadata(LLVMContext::MD_invariant_load)) {
5599 // We don't want to do any sophisticated alias analysis, so we only check
5600 // the instructions after I in I's parent block if we try to sink to its
5601 // successor block.
5602 if (DestBlock->getUniquePredecessor() != I->getParent())
5603 return false;
5604 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
5605 E = I->getParent()->end();
5606 Scan != E; ++Scan)
5607 if (Scan->mayWriteToMemory())
5608 return false;
5609 }
5610
5611 I->dropDroppableUses([&](const Use *U) {
5612 auto *I = dyn_cast<Instruction>(U->getUser());
5613 if (I && I->getParent() != DestBlock) {
5614 Worklist.add(I);
5615 return true;
5616 }
5617 return false;
5618 });
5619 /// FIXME: We could remove droppable uses that are not dominated by
5620 /// the new position.
5621
5622 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
5623 I->moveBefore(*DestBlock, InsertPos);
5624 ++NumSunkInst;
5625
5626 // Also sink all related debug uses from the source basic block. Otherwise we
5627 // get debug use before the def. Attempt to salvage debug uses first, to
5628 // maximise the range variables have location for. If we cannot salvage, then
5629 // mark the location undef: we know it was supposed to receive a new location
5630 // here, but that computation has been sunk.
5631 SmallVector<DbgVariableRecord *, 2> DbgVariableRecords;
5632 findDbgUsers(I, DbgVariableRecords);
5633 if (!DbgVariableRecords.empty())
5634 tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
5635 DbgVariableRecords);
5636
5637 // PS: there are numerous flaws with this behaviour, not least that right now
5638 // assignments can be re-ordered past other assignments to the same variable
5639 // if they use different Values. Creating more undef assignements can never be
5640 // undone. And salvaging all users outside of this block can un-necessarily
5641 // alter the lifetime of the live-value that the variable refers to.
5642 // Some of these things can be resolved by tolerating debug use-before-defs in
5643 // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
5644 // being used for more architectures.
5645
5646 return true;
5647}
5648
5650 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
5651 BasicBlock *DestBlock,
5652 SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
5653 // For all debug values in the destination block, the sunk instruction
5654 // will still be available, so they do not need to be dropped.
5655
5656 // Fetch all DbgVariableRecords not already in the destination.
5657 SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage;
5658 for (auto &DVR : DbgVariableRecords)
5659 if (DVR->getParent() != DestBlock)
5660 DbgVariableRecordsToSalvage.push_back(DVR);
5661
5662 // Fetch a second collection, of DbgVariableRecords in the source block that
5663 // we're going to sink.
5664 SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
5665 for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
5666 if (DVR->getParent() == SrcBlock)
5667 DbgVariableRecordsToSink.push_back(DVR);
5668
5669 // Sort DbgVariableRecords according to their position in the block. This is a
5670 // partial order: DbgVariableRecords attached to different instructions will
5671 // be ordered by the instruction order, but DbgVariableRecords attached to the
5672 // same instruction won't have an order.
5673 auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool {
5674 return B->getInstruction()->comesBefore(A->getInstruction());
5675 };
5676 llvm::stable_sort(DbgVariableRecordsToSink, Order);
5677
5678 // If there are two assignments to the same variable attached to the same
5679 // instruction, the ordering between the two assignments is important. Scan
5680 // for this (rare) case and establish which is the last assignment.
5681 using InstVarPair = std::pair<const Instruction *, DebugVariable>;
5683 if (DbgVariableRecordsToSink.size() > 1) {
5685 // Count how many assignments to each variable there is per instruction.
5686 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5687 DebugVariable DbgUserVariable =
5688 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5689 DVR->getDebugLoc()->getInlinedAt());
5690 CountMap[std::make_pair(DVR->getInstruction(), DbgUserVariable)] += 1;
5691 }
5692
5693 // If there are any instructions with two assignments, add them to the
5694 // FilterOutMap to record that they need extra filtering.
5696 for (auto It : CountMap) {
5697 if (It.second > 1) {
5698 FilterOutMap[It.first] = nullptr;
5699 DupSet.insert(It.first.first);
5700 }
5701 }
5702
5703 // For all instruction/variable pairs needing extra filtering, find the
5704 // latest assignment.
5705 for (const Instruction *Inst : DupSet) {
5706 for (DbgVariableRecord &DVR :
5707 llvm::reverse(filterDbgVars(Inst->getDbgRecordRange()))) {
5708 DebugVariable DbgUserVariable =
5709 DebugVariable(DVR.getVariable(), DVR.getExpression(),
5710 DVR.getDebugLoc()->getInlinedAt());
5711 auto FilterIt =
5712 FilterOutMap.find(std::make_pair(Inst, DbgUserVariable));
5713 if (FilterIt == FilterOutMap.end())
5714 continue;
5715 if (FilterIt->second != nullptr)
5716 continue;
5717 FilterIt->second = &DVR;
5718 }
5719 }
5720 }
5721
5722 // Perform cloning of the DbgVariableRecords that we plan on sinking, filter
5723 // out any duplicate assignments identified above.
5725 SmallSet<DebugVariable, 4> SunkVariables;
5726 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5728 continue;
5729
5730 DebugVariable DbgUserVariable =
5731 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5732 DVR->getDebugLoc()->getInlinedAt());
5733
5734 // For any variable where there were multiple assignments in the same place,
5735 // ignore all but the last assignment.
5736 if (!FilterOutMap.empty()) {
5737 InstVarPair IVP = std::make_pair(DVR->getInstruction(), DbgUserVariable);
5738 auto It = FilterOutMap.find(IVP);
5739
5740 // Filter out.
5741 if (It != FilterOutMap.end() && It->second != DVR)
5742 continue;
5743 }
5744
5745 if (!SunkVariables.insert(DbgUserVariable).second)
5746 continue;
5747
5748 if (DVR->isDbgAssign())
5749 continue;
5750
5751 DVRClones.emplace_back(DVR->clone());
5752 LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n');
5753 }
5754
5755 // Perform salvaging without the clones, then sink the clones.
5756 if (DVRClones.empty())
5757 return;
5758
5759 salvageDebugInfoForDbgValues(*I, DbgVariableRecordsToSalvage);
5760
5761 // The clones are in reverse order of original appearance. Assert that the
5762 // head bit is set on the iterator as we _should_ have received it via
5763 // getFirstInsertionPt. Inserting like this will reverse the clone order as
5764 // we'll repeatedly insert at the head, such as:
5765 // DVR-3 (third insertion goes here)
5766 // DVR-2 (second insertion goes here)
5767 // DVR-1 (first insertion goes here)
5768 // Any-Prior-DVRs
5769 // InsertPtInst
5770 assert(InsertPos.getHeadBit());
5771 for (DbgVariableRecord *DVRClone : DVRClones) {
5772 InsertPos->getParent()->insertDbgRecordBefore(DVRClone, InsertPos);
5773 LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n');
5774 }
5775}
5776
5778 while (!Worklist.isEmpty()) {
5779 // Walk deferred instructions in reverse order, and push them to the
5780 // worklist, which means they'll end up popped from the worklist in-order.
5781 while (Instruction *I = Worklist.popDeferred()) {
5782 // Check to see if we can DCE the instruction. We do this already here to
5783 // reduce the number of uses and thus allow other folds to trigger.
5784 // Note that eraseInstFromFunction() may push additional instructions on
5785 // the deferred worklist, so this will DCE whole instruction chains.
5788 ++NumDeadInst;
5789 continue;
5790 }
5791
5792 Worklist.push(I);
5793 }
5794
5795 Instruction *I = Worklist.removeOne();
5796 if (I == nullptr) continue; // skip null values.
5797
5798 // Check to see if we can DCE the instruction.
5801 ++NumDeadInst;
5802 continue;
5803 }
5804
5805 if (!DebugCounter::shouldExecute(VisitCounter))
5806 continue;
5807
5808 // See if we can trivially sink this instruction to its user if we can
5809 // prove that the successor is not executed more frequently than our block.
5810 // Return the UserBlock if successful.
5811 auto getOptionalSinkBlockForInst =
5812 [this](Instruction *I) -> std::optional<BasicBlock *> {
5813 if (!EnableCodeSinking)
5814 return std::nullopt;
5815
5816 BasicBlock *BB = I->getParent();
5817 BasicBlock *UserParent = nullptr;
5818 unsigned NumUsers = 0;
5819
5820 for (Use &U : I->uses()) {
5821 User *User = U.getUser();
5822 if (User->isDroppable()) {
5823 // Do not sink if there are dereferenceable assumes that would be
5824 // removed.
5826 if (II->getIntrinsicID() != Intrinsic::assume ||
5827 !II->getOperandBundle("dereferenceable"))
5828 continue;
5829 }
5830
5831 if (NumUsers > MaxSinkNumUsers)
5832 return std::nullopt;
5833
5834 Instruction *UserInst = cast<Instruction>(User);
5835 // Special handling for Phi nodes - get the block the use occurs in.
5836 BasicBlock *UserBB = UserInst->getParent();
5837 if (PHINode *PN = dyn_cast<PHINode>(UserInst))
5838 UserBB = PN->getIncomingBlock(U);
5839 // Bail out if we have uses in different blocks. We don't do any
5840 // sophisticated analysis (i.e finding NearestCommonDominator of these
5841 // use blocks).
5842 if (UserParent && UserParent != UserBB)
5843 return std::nullopt;
5844 UserParent = UserBB;
5845
5846 // Make sure these checks are done only once, naturally we do the checks
5847 // the first time we get the userparent, this will save compile time.
5848 if (NumUsers == 0) {
5849 // Try sinking to another block. If that block is unreachable, then do
5850 // not bother. SimplifyCFG should handle it.
5851 if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
5852 return std::nullopt;
5853
5854 auto *Term = UserParent->getTerminator();
5855 // See if the user is one of our successors that has only one
5856 // predecessor, so that we don't have to split the critical edge.
5857 // Another option where we can sink is a block that ends with a
5858 // terminator that does not pass control to other block (such as
5859 // return or unreachable or resume). In this case:
5860 // - I dominates the User (by SSA form);
5861 // - the User will be executed at most once.
5862 // So sinking I down to User is always profitable or neutral.
5863 if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
5864 return std::nullopt;
5865
5866 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
5867 }
5868
5869 NumUsers++;
5870 }
5871
5872 // No user or only has droppable users.
5873 if (!UserParent)
5874 return std::nullopt;
5875
5876 return UserParent;
5877 };
5878
5879 auto OptBB = getOptionalSinkBlockForInst(I);
5880 if (OptBB) {
5881 auto *UserParent = *OptBB;
5882 // Okay, the CFG is simple enough, try to sink this instruction.
5883 if (tryToSinkInstruction(I, UserParent)) {
5884 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
5885 MadeIRChange = true;
5886 // We'll add uses of the sunk instruction below, but since
5887 // sinking can expose opportunities for it's *operands* add
5888 // them to the worklist
5889 for (Use &U : I->operands())
5890 if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
5891 Worklist.push(OpI);
5892 }
5893 }
5894
5895 // Now that we have an instruction, try combining it to simplify it.
5896 Builder.SetInsertPoint(I);
5897 Builder.CollectMetadataToCopy(
5898 I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5899
5900#ifndef NDEBUG
5901 std::string OrigI;
5902#endif
5903 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS););
5904 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
5905
5906 if (Instruction *Result = visit(*I)) {
5907 ++NumCombined;
5908 // Should we replace the old instruction with a new one?
5909 if (Result != I) {
5910 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
5911 << " New = " << *Result << '\n');
5912
5913 // We copy the old instruction's DebugLoc to the new instruction, unless
5914 // InstCombine already assigned a DebugLoc to it, in which case we
5915 // should trust the more specifically selected DebugLoc.
5916 Result->setDebugLoc(Result->getDebugLoc().orElse(I->getDebugLoc()));
5917 // We also copy annotation metadata to the new instruction.
5918 Result->copyMetadata(*I, LLVMContext::MD_annotation);
5919 // Everything uses the new instruction now.
5920 I->replaceAllUsesWith(Result);
5921
5922 // Move the name to the new instruction first.
5923 Result->takeName(I);
5924
5925 // Insert the new instruction into the basic block...
5926 BasicBlock *InstParent = I->getParent();
5927 BasicBlock::iterator InsertPos = I->getIterator();
5928
5929 // Are we replace a PHI with something that isn't a PHI, or vice versa?
5930 if (isa<PHINode>(Result) != isa<PHINode>(I)) {
5931 // We need to fix up the insertion point.
5932 if (isa<PHINode>(I)) // PHI -> Non-PHI
5933 InsertPos = InstParent->getFirstInsertionPt();
5934 else // Non-PHI -> PHI
5935 InsertPos = InstParent->getFirstNonPHIIt();
5936 }
5937
5938 Result->insertInto(InstParent, InsertPos);
5939
5940 // Push the new instruction and any users onto the worklist.
5941 Worklist.pushUsersToWorkList(*Result);
5942 Worklist.push(Result);
5943
5945 } else {
5946 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
5947 << " New = " << *I << '\n');
5948
5949 // If the instruction was modified, it's possible that it is now dead.
5950 // if so, remove it.
5953 } else {
5954 Worklist.pushUsersToWorkList(*I);
5955 Worklist.push(I);
5956 }
5957 }
5958 MadeIRChange = true;
5959 }
5960 }
5961
5962 Worklist.zap();
5963 return MadeIRChange;
5964}
5965
5966// Track the scopes used by !alias.scope and !noalias. In a function, a
5967// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5968// by both sets. If not, the declaration of the scope can be safely omitted.
5969// The MDNode of the scope can be omitted as well for the instructions that are
5970// part of this function. We do not do that at this point, as this might become
5971// too time consuming to do.
5973 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
5974 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
5975
5976public:
5978 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
5979 if (!I->hasMetadataOtherThanDebugLoc())
5980 return;
5981
5982 auto Track = [](Metadata *ScopeList, auto &Container) {
5983 const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
5984 if (!MDScopeList || !Container.insert(MDScopeList).second)
5985 return;
5986 for (const auto &MDOperand : MDScopeList->operands())
5987 if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
5988 Container.insert(MDScope);
5989 };
5990
5991 Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
5992 Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
5993 }
5994
5997 if (!Decl)
5998 return false;
5999
6000 assert(Decl->use_empty() &&
6001 "llvm.experimental.noalias.scope.decl in use ?");
6002 const MDNode *MDSL = Decl->getScopeList();
6003 assert(MDSL->getNumOperands() == 1 &&
6004 "llvm.experimental.noalias.scope should refer to a single scope");
6005 auto &MDOperand = MDSL->getOperand(0);
6006 if (auto *MD = dyn_cast<MDNode>(MDOperand))
6007 return !UsedAliasScopesAndLists.contains(MD) ||
6008 !UsedNoAliasScopesAndLists.contains(MD);
6009
6010 // Not an MDNode ? throw away.
6011 return true;
6012 }
6013};
6014
6015/// Populate the IC worklist from a function, by walking it in reverse
6016/// post-order and adding all reachable code to the worklist.
6017///
6018/// This has a couple of tricks to make the code faster and more powerful. In
6019/// particular, we constant fold and DCE instructions as we go, to avoid adding
6020/// them to the worklist (this significantly speeds up instcombine on code where
6021/// many instructions are dead or constant). Additionally, if we find a branch
6022/// whose condition is a known constant, we only visit the reachable successors.
6024 bool MadeIRChange = false;
6026 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
6027 DenseMap<Constant *, Constant *> FoldedConstants;
6028 AliasScopeTracker SeenAliasScopes;
6029
6030 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
6031 for (BasicBlock *Succ : successors(BB))
6032 if (Succ != LiveSucc && DeadEdges.insert({BB, Succ}).second)
6033 for (PHINode &PN : Succ->phis())
6034 for (Use &U : PN.incoming_values())
6035 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(U)) {
6036 U.set(PoisonValue::get(PN.getType()));
6037 MadeIRChange = true;
6038 }
6039 };
6040
6041 for (BasicBlock *BB : RPOT) {
6042 if (!BB->isEntryBlock() && all_of(predecessors(BB), [&](BasicBlock *Pred) {
6043 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
6044 })) {
6045 HandleOnlyLiveSuccessor(BB, nullptr);
6046 continue;
6047 }
6048 LiveBlocks.insert(BB);
6049
6050 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
6051 // ConstantProp instruction if trivially constant.
6052 if (!Inst.use_empty() &&
6053 (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
6054 if (Constant *C = ConstantFoldInstruction(&Inst, DL, &TLI)) {
6055 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
6056 << '\n');
6057 Inst.replaceAllUsesWith(C);
6058 ++NumConstProp;
6059 if (isInstructionTriviallyDead(&Inst, &TLI))
6060 Inst.eraseFromParent();
6061 MadeIRChange = true;
6062 continue;
6063 }
6064
6065 // See if we can constant fold its operands.
6066 for (Use &U : Inst.operands()) {
6068 continue;
6069
6070 auto *C = cast<Constant>(U);
6071 Constant *&FoldRes = FoldedConstants[C];
6072 if (!FoldRes)
6073 FoldRes = ConstantFoldConstant(C, DL, &TLI);
6074
6075 if (FoldRes != C) {
6076 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
6077 << "\n Old = " << *C
6078 << "\n New = " << *FoldRes << '\n');
6079 U = FoldRes;
6080 MadeIRChange = true;
6081 }
6082 }
6083
6084 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
6085 // these call instructions consumes non-trivial amount of time and
6086 // provides no value for the optimization.
6087 if (!Inst.isDebugOrPseudoInst()) {
6088 InstrsForInstructionWorklist.push_back(&Inst);
6089 SeenAliasScopes.analyse(&Inst);
6090 }
6091 }
6092
6093 // If this is a branch or switch on a constant, mark only the single
6094 // live successor. Otherwise assume all successors are live.
6095 Instruction *TI = BB->getTerminator();
6096 if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
6097 if (isa<UndefValue>(BI->getCondition())) {
6098 // Branch on undef is UB.
6099 HandleOnlyLiveSuccessor(BB, nullptr);
6100 continue;
6101 }
6102 if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
6103 bool CondVal = Cond->getZExtValue();
6104 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(!CondVal));
6105 continue;
6106 }
6107 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
6108 if (isa<UndefValue>(SI->getCondition())) {
6109 // Switch on undef is UB.
6110 HandleOnlyLiveSuccessor(BB, nullptr);
6111 continue;
6112 }
6113 if (auto *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
6114 HandleOnlyLiveSuccessor(BB,
6115 SI->findCaseValue(Cond)->getCaseSuccessor());
6116 continue;
6117 }
6118 }
6119 }
6120
6121 // Remove instructions inside unreachable blocks. This prevents the
6122 // instcombine code from having to deal with some bad special cases, and
6123 // reduces use counts of instructions.
6124 for (BasicBlock &BB : F) {
6125 if (LiveBlocks.count(&BB))
6126 continue;
6127
6128 unsigned NumDeadInstInBB;
6129 NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(&BB);
6130
6131 MadeIRChange |= NumDeadInstInBB != 0;
6132 NumDeadInst += NumDeadInstInBB;
6133 }
6134
6135 // Once we've found all of the instructions to add to instcombine's worklist,
6136 // add them in reverse order. This way instcombine will visit from the top
6137 // of the function down. This jives well with the way that it adds all uses
6138 // of instructions to the worklist after doing a transformation, thus avoiding
6139 // some N^2 behavior in pathological cases.
6140 Worklist.reserve(InstrsForInstructionWorklist.size());
6141 for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
6142 // DCE instruction if trivially dead. As we iterate in reverse program
6143 // order here, we will clean up whole chains of dead instructions.
6144 if (isInstructionTriviallyDead(Inst, &TLI) ||
6145 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
6146 ++NumDeadInst;
6147 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
6148 salvageDebugInfo(*Inst);
6149 Inst->eraseFromParent();
6150 MadeIRChange = true;
6151 continue;
6152 }
6153
6154 Worklist.push(Inst);
6155 }
6156
6157 return MadeIRChange;
6158}
6159
6161 // Collect backedges.
6162 SmallVector<bool> Visited(F.getMaxBlockNumber());
6163 for (BasicBlock *BB : RPOT) {
6164 Visited[BB->getNumber()] = true;
6165 for (BasicBlock *Succ : successors(BB))
6166 if (Visited[Succ->getNumber()])
6167 BackEdges.insert({BB, Succ});
6168 }
6169 ComputedBackEdges = true;
6170}
6171
6177 const InstCombineOptions &Opts) {
6178 auto &DL = F.getDataLayout();
6179 bool VerifyFixpoint = Opts.VerifyFixpoint &&
6180 !F.hasFnAttribute("instcombine-no-verify-fixpoint");
6181
6182 /// Builder - This is an IRBuilder that automatically inserts new
6183 /// instructions into the worklist when they are created.
6185 F.getContext(), TargetFolder(DL),
6186 IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
6187 Worklist.add(I);
6188 if (auto *Assume = dyn_cast<AssumeInst>(I))
6189 AC.registerAssumption(Assume);
6190 }));
6191
6193
6194 // Lower dbg.declare intrinsics otherwise their value may be clobbered
6195 // by instcombiner.
6196 bool MadeIRChange = false;
6198 MadeIRChange = LowerDbgDeclare(F);
6199
6200 // Iterate while there is work to do.
6201 unsigned Iteration = 0;
6202 while (true) {
6203 if (Iteration >= Opts.MaxIterations && !VerifyFixpoint) {
6204 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
6205 << " on " << F.getName()
6206 << " reached; stopping without verifying fixpoint\n");
6207 break;
6208 }
6209
6210 ++Iteration;
6211 ++NumWorklistIterations;
6212 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
6213 << F.getName() << "\n");
6214
6215 InstCombinerImpl IC(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI,
6216 BPI, PSI, DL, RPOT);
6218 bool MadeChangeInThisIteration = IC.prepareWorklist(F);
6219 MadeChangeInThisIteration |= IC.run();
6220 if (!MadeChangeInThisIteration)
6221 break;
6222
6223 MadeIRChange = true;
6224 if (Iteration > Opts.MaxIterations) {
6226 "Instruction Combining on " + Twine(F.getName()) +
6227 " did not reach a fixpoint after " + Twine(Opts.MaxIterations) +
6228 " iterations. " +
6229 "Use 'instcombine<no-verify-fixpoint>' or function attribute "
6230 "'instcombine-no-verify-fixpoint' to suppress this error.");
6231 }
6232 }
6233
6234 if (Iteration == 1)
6235 ++NumOneIteration;
6236 else if (Iteration == 2)
6237 ++NumTwoIterations;
6238 else if (Iteration == 3)
6239 ++NumThreeIterations;
6240 else
6241 ++NumFourOrMoreIterations;
6242
6243 return MadeIRChange;
6244}
6245
6247
6249 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
6250 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
6251 OS, MapClassName2PassName);
6252 OS << '<';
6253 OS << "max-iterations=" << Options.MaxIterations << ";";
6254 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
6255 OS << '>';
6256}
6257
6258char InstCombinePass::ID = 0;
6259
6262 auto &LRT = AM.getResult<LastRunTrackingAnalysis>(F);
6263 // No changes since last InstCombine pass, exit early.
6264 if (LRT.shouldSkip(&ID))
6265 return PreservedAnalyses::all();
6266
6267 auto &AC = AM.getResult<AssumptionAnalysis>(F);
6268 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
6269 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
6271 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
6272
6273 auto *AA = &AM.getResult<AAManager>(F);
6274 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
6275 ProfileSummaryInfo *PSI =
6276 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
6277 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
6278 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
6280
6281 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6282 BFI, BPI, PSI, Options)) {
6283 // No changes, all analyses are preserved.
6284 LRT.update(&ID, /*Changed=*/false);
6285 return PreservedAnalyses::all();
6286 }
6287
6288 // Mark all the analyses that instcombine updates as preserved.
6290 LRT.update(&ID, /*Changed=*/true);
6293 return PA;
6294}
6295
6311
6313 if (skipFunction(F))
6314 return false;
6315
6316 // Required analyses.
6317 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
6318 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
6319 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
6321 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6323
6324 // Optional analyses.
6325 ProfileSummaryInfo *PSI =
6327 BlockFrequencyInfo *BFI =
6328 (PSI && PSI->hasProfileSummary()) ?
6330 nullptr;
6331 BranchProbabilityInfo *BPI = nullptr;
6332 if (auto *WrapperPass =
6334 BPI = &WrapperPass->getBPI();
6335
6336 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6337 BFI, BPI, PSI, InstCombineOptions());
6338}
6339
6341
6343
6345 "Combine redundant instructions", false, false)
6356 "Combine redundant instructions", false, false)
6357
6358// Initialization Routines.
6362
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI)
DXIL Resource Access
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
static bool isSigned(unsigned Opcode)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
iv Induction Variable Users
Definition IVUsers.cpp:48
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "(X ROp Y) LOp Z" is always equal to "(X LOp Z) ROp (Y LOp Z)".
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
This file provides internal interfaces used to implement the InstCombine.
This file provides the primary interface to the instcombine pass.
static Value * simplifySwitchOnSelectUsingRanges(SwitchInst &SI, SelectInst *Select, bool IsTrueArm)
static bool isUsedWithinShuffleVector(Value *V)
static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, Instruction *AI)
static Constant * constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector, Constant *Splat, bool SplatLHS, const DataLayout &DL)
static bool shorter_filter(const Value *LHS, const Value *RHS)
static Instruction * combineConstantOffsets(GetElementPtrInst &GEP, InstCombinerImpl &IC)
Combine constant offsets separated by variable offsets.
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src)
static cl::opt< unsigned > MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine"))
static Instruction * foldSpliceBinOp(BinaryOperator &Inst, InstCombiner::BuilderTy &Builder)
static cl::opt< unsigned > ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true))
static bool hasNoSignedWrap(BinaryOperator &I)
static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombinerImpl &IC)
Combine constant operands of associative operations either before or after a cast to eliminate one of...
static bool combineInstructionsOverFunction(Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, const InstCombineOptions &Opts)
static Value * simplifyInstructionWithPHI(Instruction &I, PHINode *PN, Value *InValue, BasicBlock *InBB, const DataLayout &DL, const SimplifyQuery SQ)
static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP)
Return true if we should canonicalize the gep to an i8 ptradd.
static Value * getIdentityValue(Instruction::BinaryOps Opcode, Value *V)
This function returns identity value for given opcode, which can be used to factor patterns like (X *...
static Value * foldFrexpOfSelect(ExtractValueInst &EV, IntrinsicInst *FrexpCall, SelectInst *SelectInst, InstCombiner::BuilderTy &Builder)
static std::optional< std::pair< Value *, Value * > > matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS)
static std::optional< ModRefInfo > isAllocSiteRemovable(Instruction *AI, SmallVectorImpl< Instruction * > &Users, const TargetLibraryInfo &TLI, bool KnowInit)
static cl::opt< unsigned > MaxAllocSiteRemovableUsers("instcombine-max-allocsite-removable-users", cl::Hidden, cl::init(2048), cl::desc("Maximum number of users to visit in alloc-site " "removability analysis"))
static Value * foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, Value *NewOp, InstCombiner &IC)
static Instruction * canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP, GEPOperator *Src, InstCombinerImpl &IC)
static Instruction * tryToMoveFreeBeforeNullTest(CallInst &FI, const DataLayout &DL)
Move the call to free before a NULL test.
static Value * simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI, bool IsTrueArm)
static Value * tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, InstCombiner::BuilderTy &Builder, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D)
This tries to simplify binary operations by factorizing out common terms (e.
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS, BinaryOperator *OtherOp)
This function predicates factorization using distributive laws.
static bool hasNoUnsignedWrap(BinaryOperator &I)
static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI)
Check for case where the call writes to an otherwise dead alloca.
static cl::opt< unsigned > MaxSinkNumUsers("instcombine-max-sink-users", cl::init(32), cl::desc("Maximum number of undroppable users for instruction sinking"))
static Instruction * foldGEPOfPhi(GetElementPtrInst &GEP, PHINode *PN, IRBuilderBase &Builder)
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo)
Return 'true' if the given typeinfo will match anything.
static cl::opt< bool > EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true))
static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C)
static GEPNoWrapFlags getMergedGEPNoWrapFlags(GEPOperator &GEP1, GEPOperator &GEP2)
Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y)) transform.
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static bool IsSelect(unsigned Opcode, bool CheckOnlyCC=false)
Check if the opcode is a SELECT or SELECT_CC variant.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > & Cond
static unsigned getNumElements(Type *Ty)
unsigned OpIndex
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
bool isNoAliasScopeDeclDead(Instruction *Inst)
void analyse(Instruction *I)
The Input class is used to parse a yaml document into in-memory structs and vectors.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:227
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1810
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
static LLVM_ABI void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Definition APInt.cpp:1942
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1980
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:2012
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1157
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1993
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:218
size_t size() const
Get the array size.
Definition ArrayRef.h:141
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
uint64_t getNumElements() const
Type * getElementType() const
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
LLVM_ABI uint64_t getDereferenceableBytes() const
Returns the number of dereferenceable bytes from the dereferenceable attribute.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:261
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:530
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI bool isEntryBlock() const
Return true if this is the entry block of the containing function.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:484
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
size_t size() const
Definition BasicBlock.h:482
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
BinaryOps getOpcode() const
Definition InstrTypes.h:409
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:329
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
void setAttributes(AttributeList A)
Set the attributes for this call.
bool doesNotThrow() const
Determine if the call cannot unwind.
Value * getArgOperand(unsigned i) const
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_NE
not equal
Definition InstrTypes.h:762
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:852
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
ConstantArray - Constant Array Declarations.
Definition Constants.h:584
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition Constants.h:945
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNot(Constant *C)
static LLVM_ABI Constant * getAdd(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI ConstantRange makeExactNoWrapRegion(Instruction::BinaryOps BinOp, const APInt &Other, unsigned NoWrapKind)
Produce the range that contains X if and only if "X BinOp Other" does not wrap.
Constant Vector Declarations.
Definition Constants.h:668
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * replaceUndefsWith(Constant *C, Constant *Replacement)
Try to replace undefined constant C or undefined elements in C with Replacement.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
const Constant * stripPointerCasts() const
Definition Constant.h:228
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
static LLVM_ABI DIExpression * appendOpsToArg(const DIExpression *Expr, ArrayRef< uint64_t > Ops, unsigned ArgNo, bool StackValue=false)
Create a copy of Expr by appending the given list of Ops to each instance of the operand DW_OP_LLVM_a...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
static bool shouldExecute(CounterInfo &Counter)
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool empty() const
Definition DenseMap.h:113
iterator end()
Definition DenseMap.h:85
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:239
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
iterator_range< idx_iterator > indices() const
idx_iterator idx_end() const
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
idx_iterator idx_begin() const
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
FunctionPass(char &pid)
Definition Pass.h:316
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition Pass.cpp:188
const BasicBlock & getEntryBlock() const
Definition Function.h:809
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags inBounds()
static GEPNoWrapFlags all()
static GEPNoWrapFlags noUnsignedWrap()
GEPNoWrapFlags intersectForReassociate(GEPNoWrapFlags Other) const
Given (gep (gep p, x), y), determine the nowrap flags for (gep (gep, p, y), x).
bool hasNoUnsignedWrap() const
bool isInBounds() const
GEPNoWrapFlags intersectForOffsetAdd(GEPNoWrapFlags Other) const
Given (gep (gep p, x), y), determine the nowrap flags for (gep p, x+y).
static GEPNoWrapFlags none()
GEPNoWrapFlags getNoWrapFlags() const
Definition Operator.h:383
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static LLVM_ABI Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction compares its operands according to the predicate given to the constructor.
CmpPredicate getCmpPredicate() const
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition IRBuilder.h:2091
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition IRBuilder.h:544
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858
This instruction inserts a struct field of array element value into an aggregate value.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI InstCombinePass(InstCombineOptions Opts={})
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Instruction * foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I)
Tries to simplify binops of select and cast of the select condition.
Instruction * visitCondBrInst(CondBrInst &BI)
Instruction * foldBinOpIntoSelectOrPhi(BinaryOperator &I)
This is a convenience wrapper function for the above two functions.
bool SimplifyAssociativeOrCommutative(BinaryOperator &I)
Performs a few simplifications for operators which are associative or commutative.
Instruction * visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src)
Value * foldUsingDistributiveLaws(BinaryOperator &I)
Tries to simplify binary operations which some other binary operation distributes over.
Instruction * foldBinOpShiftWithShift(BinaryOperator &I)
Instruction * visitUnreachableInst(UnreachableInst &I)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * visitFreeze(FreezeInst &I)
Instruction * foldBinOpSelectBinOp(BinaryOperator &Op)
In some cases it is beneficial to fold a select into a binary operator.
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
bool prepareWorklist(Function &F)
Perform early cleanup and prepare the InstCombine worklist.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitExtractValueInst(ExtractValueInst &EV)
void handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc)
Instruction * foldBinopWithRecurrence(BinaryOperator &BO)
Try to fold binary operators whose operands are simple interleaved recurrences to a single recurrence...
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitLandingPadInst(LandingPadInst &LI)
Instruction * visitReturnInst(ReturnInst &RI)
Instruction * visitSwitchInst(SwitchInst &SI)
Instruction * foldBinopWithPhiOperands(BinaryOperator &BO)
For a binary operator with 2 phi operands, try to hoist the binary operation before the phi.
bool SimplifyDemandedFPClass(Instruction *I, unsigned Op, FPClassTest DemandedMask, KnownFPClass &Known, const SimplifyQuery &Q, unsigned Depth=0)
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; }...
Instruction * tryFoldInstWithCtpopWithNot(Instruction *I)
Instruction * visitUncondBrInst(UncondBrInst &BI)
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Value * pushFreezeToPreventPoisonFromPropagating(FreezeInst &FI)
bool run()
Run the combiner over the entire worklist until it is empty.
Instruction * foldVectorBinop(BinaryOperator &Inst)
Canonicalize the position of binops relative to shufflevector.
bool removeInstructionsBeforeUnreachable(Instruction &I)
Value * SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS)
void tryToSinkInstructionDbgVariableRecords(Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, BasicBlock *DestBlock, SmallVectorImpl< DbgVariableRecord * > &DPUsers)
void addDeadEdge(BasicBlock *From, BasicBlock *To, SmallVectorImpl< BasicBlock * > &Worklist)
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, ShMask) = C Returns nullptr if such a constant ...
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitGetElementPtrInst(GetElementPtrInst &GEP)
Value * tryFactorizationFolds(BinaryOperator &I)
This tries to simplify binary operations by factorizing out common terms (e.
Instruction * foldFreezeIntoRecurrence(FreezeInst &I, PHINode *PN)
bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock)
Try to move the specified instruction from its current block into the beginning of DestBlock,...
bool freezeOtherUses(FreezeInst &FI)
void freelyInvertAllUsersOf(Value *V, Value *IgnoredUser=nullptr)
Freely adapt every user of V as-if V was changed to !V.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
static unsigned getComplexity(Value *V)
Assign a complexity or rank value to LLVM Values.
TargetLibraryInfo & TLI
unsigned ComputeNumSignBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
static bool shouldAvoidAbsorbingNotIntoSelect(const SelectInst &SI)
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
static bool isCanonicalPredicate(CmpPredicate Pred)
Predicate canonicalization reduces the number of patterns that need to be matched by other transforms...
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
BranchProbabilityInfo * BPI
ReversePostOrderTraversal< BasicBlock * > & RPOT
const DataLayout & DL
DomConditionCache DC
const bool MinimizeSize
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
LLVM_ABI std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
void addToWorklist(Instruction *I)
LLVM_ABI Value * getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume, unsigned Depth)
Return nonnull value if V is free to invert under the condition of WillInvertAllUses.
SmallDenseSet< std::pair< const BasicBlock *, const BasicBlock * >, 8 > BackEdges
Backedges, used to avoid pushing instructions across backedges in cases where this may result in infi...
LLVM_ABI std::optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
DominatorTree & DT
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
SmallDenseSet< std::pair< BasicBlock *, BasicBlock * >, 8 > DeadEdges
Edges that are known to never be taken.
LLVM_ABI std::optional< Value * > targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
BuilderTy & Builder
LLVM_ABI bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
bool isBackEdge(const BasicBlock *From, const BasicBlock *To)
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
void visit(Iterator Start, Iterator End)
Definition InstVisitor.h:87
The legacy pass manager's instcombine pass.
Definition InstCombine.h:68
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void add(Instruction *I)
Add instruction to the worklist.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
bool isTerminator() const
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI bool willReturn() const LLVM_READONLY
Return true if the instruction will return (unwinding is considered as a form of returning control fl...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isBitwiseLogicOp() const
Return true if this is and/or/xor.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
bool isIntDivRem() const
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
A wrapper class for inspecting calls to intrinsic functions.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
The landingpad instruction holds all of the information necessary to generate correct exception handl...
bool isCleanup() const
Return 'true' if this landingpad instruction is a cleanup.
unsigned getNumClauses() const
Get the number of clauses for this landing pad.
static LLVM_ABI LandingPadInst * Create(Type *RetTy, unsigned NumReservedClauses, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedClauses is a hint for the number of incoming clauses that this landingpad w...
LLVM_ABI void addClause(Constant *ClauseVal)
Add a catch or filter clause to the landing pad.
bool isCatch(unsigned Idx) const
Return 'true' if the clause and index Idx is a catch clause.
bool isFilter(unsigned Idx) const
Return 'true' if the clause and index Idx is a filter clause.
Constant * getClause(unsigned Idx) const
Get the value of the clause at index Idx.
void setCleanup(bool V)
Indicate that this landingpad instruction is a cleanup.
A function/module analysis which provides an empty LastRunTrackingInfo.
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
Tracking metadata reference owned by Metadata.
Definition Metadata.h:902
This is the common base class for memset/memcpy/memmove.
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Root of the metadata hierarchy.
Definition Metadata.h:64
Value * getLHS() const
Value * getRHS() const
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:68
MDNode * getScopeList() const
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:113
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:107
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to get analysis information tha...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition Constants.h:1671
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition Registry.h:116
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
This class represents the LLVM 'select' instruction.
const Value * getFalseValue() const
const Value * getCondition() const
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
const Value * getTrueValue() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This instruction constructs a fixed permutation of two input vectors.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Multiway switch.
TargetFolder - Create constants with target dependent folding.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:276
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:326
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
Unconditional Branch instruction.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Use * op_iterator
Definition User.h:254
op_range operands()
Definition User.h:267
op_iterator op_begin()
Definition User.h:259
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition User.cpp:119
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
op_iterator op_end()
Definition User.h:261
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition Value.h:727
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
Definition Value.cpp:162
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
bool hasUseList() const
Check if this Value has a use-list.
Definition Value.h:344
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition Value.cpp:146
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:709
bool use_empty() const
Definition Value.h:346
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition Value.cpp:895
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Value handle that is nullable, but tries to track the Value.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
An efficient, type-erasing, non-owning reference to a callable.
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
reverse_self_iterator getReverseIterator()
Definition ilist_node.h:126
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrAdd_match< PointerOpTy, OffsetOpTy > m_PtrAdd(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp)
Matches GEP with i8 source element type.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, FCmpInst > m_FCmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
auto m_PtrToIntOrAddr(const OpTy &Op)
Matches PtrToInt or PtrToAddr.
OneOps_match< OpTy, Instruction::Freeze > m_Freeze(const OpTy &Op)
Matches FreezeInst.
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
br_match m_UnconditionalBr(BasicBlock *&Succ)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_idiv_op > m_IDiv(const LHS &L, const RHS &R)
Matches integer division operations.
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
constantexpr_match m_ConstantExpr()
Match a constant expression or a constant that contains a constant expression.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
match_combine_or< CastInst_match< OpTy, UIToFPInst >, CastInst_match< OpTy, SIToFPInst > > m_IToFP(const OpTy &Op)
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
auto m_Constant()
Match an arbitrary Constant and ignore it.
NNegZExt_match< OpTy > m_NNegZExt(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
Splat_match< T > m_ConstantSplat(const T &SubPattern)
Match a constant splat. TODO: Extend this to non-constant splats.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
SelectLike_match< CondTy, LTy, RTy > m_SelectLike(const CondTy &C, const LTy &TrueC, const RTy &FalseC)
Matches a value that behaves like a boolean-controlled select, i.e.
auto m_MaxOrMin(const LHS &L, const RHS &R)
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
BinaryOp_match< LHS, RHS, Instruction::SDiv > m_SDiv(const LHS &L, const RHS &R)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
m_Intrinsic_Ty< Opnd0 >::Ty m_Ctpop(const Opnd0 &Op0)
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
cstfp_pred_ty< is_non_zero_fp > m_NonZeroFP()
Match a floating-point non-zero.
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_VectorInsert(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
@ Offset
Definition DWP.cpp:558
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2115
LLVM_ABI void initializeInstructionCombiningPassPass(PassRegistry &)
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
LLVM_ABI unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB)
Remove all instructions from a basic block other than its terminator and any present EH pad instructi...
Definition Local.cpp:2526
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI Value * simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef< Value * > Indices, GEPNoWrapFlags NW, const SimplifyQuery &Q)
Given operands for a GetElementPtrInst, fold the result or return null.
LLVM_ABI Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
bool succ_empty(const Instruction *I)
Definition CFG.h:153
LLVM_ABI Value * simplifyFreezeInst(Value *Op, const SimplifyQuery &Q)
Given an operand for a Freeze, see if we can fold the result.
LLVM_ABI FunctionPass * createInstructionCombiningPass()
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1687
auto successors(const MachineBasicBlock *BB)
LLVM_ABI Constant * ConstantFoldInstruction(const Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
LLVM_ABI std::optional< StringRef > getAllocationFamily(const Value *I, const TargetLibraryInfo *TLI)
If a function is part of an allocation family (e.g.
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
gep_type_iterator gep_type_end(const User *GEP)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI Value * getReallocatedOperand(const CallBase *CB)
If this is a call to a realloc function, return the reallocated operand.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1652
LLVM_ABI bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc,...
LLVM_ABI bool handleUnreachableTerminator(Instruction *I, SmallVectorImpl< Value * > &PoisonedValues)
If a terminator in an unreachable basic block has an operand of type Instruction, transform it into p...
Definition Local.cpp:2509
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
LLVM_ABI Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:403
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI Value * emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition Local.cpp:22
constexpr unsigned MaxAnalysisRecursionDepth
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI bool LowerDbgDeclare(Function &F)
Lowers dbg.declare records into appropriate set of dbg.value records.
Definition Local.cpp:1833
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
generic_gep_type_iterator<> gep_type_iterator
LLVM_ABI void ConvertDebugDeclareToDebugValue(DbgVariableRecord *DVR, StoreInst *SI, DIBuilder &Builder)
Inserts a dbg.value record before a store to an alloca'd value that has an associated dbg....
Definition Local.cpp:1677
LLVM_ABI void salvageDebugInfoForDbgValues(Instruction &I, ArrayRef< DbgVariableRecord * > DPInsns)
Implementation of salvageDebugInfo, applying only to instructions in Insns, rather than all debug use...
Definition Local.cpp:2078
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition Local.cpp:2455
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition ModRef.h:28
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ ModRef
The access may reference and may modify the value stored in memory.
Definition ModRef.h:36
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
@ NoModRef
The access neither references nor modifies the value stored in memory.
Definition ModRef.h:30
TargetTransformInfo TTI
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2145
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void initializeInstCombine(PassRegistry &)
Initialize all passes linked into the InstCombine library.
LLVM_ABI void findDbgUsers(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the debug info records describing a value.
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
bool isRefSet(const ModRefInfo MRI)
Definition ModRef.h:52
LLVM_ABI std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:265
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:89
SimplifyQuery getWithInstruction(const Instruction *I) const