LLVM 23.0.0git
InstructionCombining.cpp
Go to the documentation of this file.
1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APFloat.h"
37#include "llvm/ADT/APInt.h"
38#include "llvm/ADT/ArrayRef.h"
39#include "llvm/ADT/DenseMap.h"
42#include "llvm/ADT/Statistic.h"
47#include "llvm/Analysis/CFG.h"
62#include "llvm/IR/BasicBlock.h"
63#include "llvm/IR/CFG.h"
64#include "llvm/IR/Constant.h"
65#include "llvm/IR/Constants.h"
66#include "llvm/IR/DIBuilder.h"
67#include "llvm/IR/DataLayout.h"
68#include "llvm/IR/DebugInfo.h"
70#include "llvm/IR/Dominators.h"
72#include "llvm/IR/Function.h"
74#include "llvm/IR/IRBuilder.h"
75#include "llvm/IR/InstrTypes.h"
76#include "llvm/IR/Instruction.h"
79#include "llvm/IR/Intrinsics.h"
80#include "llvm/IR/Metadata.h"
81#include "llvm/IR/Operator.h"
82#include "llvm/IR/PassManager.h"
84#include "llvm/IR/Type.h"
85#include "llvm/IR/Use.h"
86#include "llvm/IR/User.h"
87#include "llvm/IR/Value.h"
88#include "llvm/IR/ValueHandle.h"
93#include "llvm/Support/Debug.h"
102#include <algorithm>
103#include <cassert>
104#include <cstdint>
105#include <memory>
106#include <optional>
107#include <string>
108#include <utility>
109
110#define DEBUG_TYPE "instcombine"
112#include <optional>
113
114using namespace llvm;
115using namespace llvm::PatternMatch;
116
117STATISTIC(NumWorklistIterations,
118 "Number of instruction combining iterations performed");
119STATISTIC(NumOneIteration, "Number of functions with one iteration");
120STATISTIC(NumTwoIterations, "Number of functions with two iterations");
121STATISTIC(NumThreeIterations, "Number of functions with three iterations");
122STATISTIC(NumFourOrMoreIterations,
123 "Number of functions with four or more iterations");
124
125STATISTIC(NumCombined , "Number of insts combined");
126STATISTIC(NumConstProp, "Number of constant folds");
127STATISTIC(NumDeadInst , "Number of dead inst eliminated");
128STATISTIC(NumSunkInst , "Number of instructions sunk");
129STATISTIC(NumExpand, "Number of expansions");
130STATISTIC(NumFactor , "Number of factorizations");
131STATISTIC(NumReassoc , "Number of reassociations");
132DEBUG_COUNTER(VisitCounter, "instcombine-visit",
133 "Controls which instructions are visited");
134
135static cl::opt<bool> EnableCodeSinking("instcombine-code-sinking",
136 cl::desc("Enable code sinking"),
137 cl::init(true));
138
140 "instcombine-max-sink-users", cl::init(32),
141 cl::desc("Maximum number of undroppable users for instruction sinking"));
142
144MaxArraySize("instcombine-maxarray-size", cl::init(1024),
145 cl::desc("Maximum array size considered when doing a combine"));
146
147namespace llvm {
149} // end namespace llvm
150
151// FIXME: Remove this flag when it is no longer necessary to convert
152// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
153// increases variable availability at the cost of accuracy. Variables that
154// cannot be promoted by mem2reg or SROA will be described as living in memory
155// for their entire lifetime. However, passes like DSE and instcombine can
156// delete stores to the alloca, leading to misleading and inaccurate debug
157// information. This flag can be removed when those passes are fixed.
158static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
159 cl::Hidden, cl::init(true));
160
161std::optional<Instruction *>
163 // Handle target specific intrinsics
164 if (II.getCalledFunction()->isTargetIntrinsic()) {
165 return TTIForTargetIntrinsicsOnly.instCombineIntrinsic(*this, II);
166 }
167 return std::nullopt;
168}
169
171 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
172 bool &KnownBitsComputed) {
173 // Handle target specific intrinsics
174 if (II.getCalledFunction()->isTargetIntrinsic()) {
175 return TTIForTargetIntrinsicsOnly.simplifyDemandedUseBitsIntrinsic(
176 *this, II, DemandedMask, Known, KnownBitsComputed);
177 }
178 return std::nullopt;
179}
180
182 IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
183 APInt &PoisonElts2, APInt &PoisonElts3,
184 std::function<void(Instruction *, unsigned, APInt, APInt &)>
185 SimplifyAndSetOp) {
186 // Handle target specific intrinsics
187 if (II.getCalledFunction()->isTargetIntrinsic()) {
188 return TTIForTargetIntrinsicsOnly.simplifyDemandedVectorEltsIntrinsic(
189 *this, II, DemandedElts, PoisonElts, PoisonElts2, PoisonElts3,
190 SimplifyAndSetOp);
191 }
192 return std::nullopt;
193}
194
195bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
196 // Approved exception for TTI use: This queries a legality property of the
197 // target, not an profitability heuristic. Ideally this should be part of
198 // DataLayout instead.
199 return TTIForTargetIntrinsicsOnly.isValidAddrSpaceCast(FromAS, ToAS);
200}
201
202Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) {
203 if (!RewriteGEP)
204 return llvm::emitGEPOffset(&Builder, DL, GEP);
205
206 IRBuilderBase::InsertPointGuard Guard(Builder);
207 auto *Inst = dyn_cast<Instruction>(GEP);
208 if (Inst)
209 Builder.SetInsertPoint(Inst);
210
211 Value *Offset = EmitGEPOffset(GEP);
212 // Rewrite non-trivial GEPs to avoid duplicating the offset arithmetic.
213 if (Inst && !GEP->hasAllConstantIndices() &&
214 !GEP->getSourceElementType()->isIntegerTy(8)) {
216 *Inst, Builder.CreateGEP(Builder.getInt8Ty(), GEP->getPointerOperand(),
217 Offset, "", GEP->getNoWrapFlags()));
219 }
220 return Offset;
221}
222
223Value *InstCombinerImpl::EmitGEPOffsets(ArrayRef<GEPOperator *> GEPs,
224 GEPNoWrapFlags NW, Type *IdxTy,
225 bool RewriteGEPs) {
226 auto Add = [&](Value *Sum, Value *Offset) -> Value * {
227 if (Sum)
228 return Builder.CreateAdd(Sum, Offset, "", NW.hasNoUnsignedWrap(),
229 NW.isInBounds());
230 else
231 return Offset;
232 };
233
234 Value *Sum = nullptr;
235 Value *OneUseSum = nullptr;
236 Value *OneUseBase = nullptr;
237 GEPNoWrapFlags OneUseFlags = GEPNoWrapFlags::all();
238 for (GEPOperator *GEP : reverse(GEPs)) {
239 Value *Offset;
240 {
241 // Expand the offset at the point of the previous GEP to enable rewriting.
242 // However, use the original insertion point for calculating Sum.
243 IRBuilderBase::InsertPointGuard Guard(Builder);
244 auto *Inst = dyn_cast<Instruction>(GEP);
245 if (RewriteGEPs && Inst)
246 Builder.SetInsertPoint(Inst);
247
249 if (Offset->getType() != IdxTy)
250 Offset = Builder.CreateVectorSplat(
251 cast<VectorType>(IdxTy)->getElementCount(), Offset);
252 if (GEP->hasOneUse()) {
253 // Offsets of one-use GEPs will be merged into the next multi-use GEP.
254 OneUseSum = Add(OneUseSum, Offset);
255 OneUseFlags = OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags());
256 if (!OneUseBase)
257 OneUseBase = GEP->getPointerOperand();
258 continue;
259 }
260
261 if (OneUseSum)
262 Offset = Add(OneUseSum, Offset);
263
264 // Rewrite the GEP to reuse the computed offset. This also includes
265 // offsets from preceding one-use GEPs of matched type.
266 if (RewriteGEPs && Inst &&
267 Offset->getType()->isVectorTy() == GEP->getType()->isVectorTy() &&
268 !(GEP->getSourceElementType()->isIntegerTy(8) &&
269 GEP->getOperand(1) == Offset)) {
271 *Inst,
272 Builder.CreatePtrAdd(
273 OneUseBase ? OneUseBase : GEP->getPointerOperand(), Offset, "",
274 OneUseFlags.intersectForOffsetAdd(GEP->getNoWrapFlags())));
276 }
277 }
278
279 Sum = Add(Sum, Offset);
280 OneUseSum = OneUseBase = nullptr;
281 OneUseFlags = GEPNoWrapFlags::all();
282 }
283 if (OneUseSum)
284 Sum = Add(Sum, OneUseSum);
285 if (!Sum)
286 return Constant::getNullValue(IdxTy);
287 return Sum;
288}
289
290/// Legal integers and common types are considered desirable. This is used to
291/// avoid creating instructions with types that may not be supported well by the
292/// the backend.
293/// NOTE: This treats i8, i16 and i32 specially because they are common
294/// types in frontend languages.
295bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
296 switch (BitWidth) {
297 case 8:
298 case 16:
299 case 32:
300 return true;
301 default:
302 return DL.isLegalInteger(BitWidth);
303 }
304}
305
306/// Return true if it is desirable to convert an integer computation from a
307/// given bit width to a new bit width.
308/// We don't want to convert from a legal or desirable type (like i8) to an
309/// illegal type or from a smaller to a larger illegal type. A width of '1'
310/// is always treated as a desirable type because i1 is a fundamental type in
311/// IR, and there are many specialized optimizations for i1 types.
312/// Common/desirable widths are equally treated as legal to convert to, in
313/// order to open up more combining opportunities.
314bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
315 unsigned ToWidth) const {
316 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
317 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
318
319 // Convert to desirable widths even if they are not legal types.
320 // Only shrink types, to prevent infinite loops.
321 if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
322 return true;
323
324 // If this is a legal or desiable integer from type, and the result would be
325 // an illegal type, don't do the transformation.
326 if ((FromLegal || isDesirableIntType(FromWidth)) && !ToLegal)
327 return false;
328
329 // Otherwise, if both are illegal, do not increase the size of the result. We
330 // do allow things like i160 -> i64, but not i64 -> i160.
331 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
332 return false;
333
334 return true;
335}
336
337/// Return true if it is desirable to convert a computation from 'From' to 'To'.
338/// We don't want to convert from a legal to an illegal type or from a smaller
339/// to a larger illegal type. i1 is always treated as a legal type because it is
340/// a fundamental type in IR, and there are many specialized optimizations for
341/// i1 types.
342bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
343 // TODO: This could be extended to allow vectors. Datalayout changes might be
344 // needed to properly support that.
345 if (!From->isIntegerTy() || !To->isIntegerTy())
346 return false;
347
348 unsigned FromWidth = From->getPrimitiveSizeInBits();
349 unsigned ToWidth = To->getPrimitiveSizeInBits();
350 return shouldChangeType(FromWidth, ToWidth);
351}
352
353// Return true, if No Signed Wrap should be maintained for I.
354// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
355// where both B and C should be ConstantInts, results in a constant that does
356// not overflow. This function only handles the Add/Sub/Mul opcodes. For
357// all other opcodes, the function conservatively returns false.
360 if (!OBO || !OBO->hasNoSignedWrap())
361 return false;
362
363 const APInt *BVal, *CVal;
364 if (!match(B, m_APInt(BVal)) || !match(C, m_APInt(CVal)))
365 return false;
366
367 // We reason about Add/Sub/Mul Only.
368 bool Overflow = false;
369 switch (I.getOpcode()) {
370 case Instruction::Add:
371 (void)BVal->sadd_ov(*CVal, Overflow);
372 break;
373 case Instruction::Sub:
374 (void)BVal->ssub_ov(*CVal, Overflow);
375 break;
376 case Instruction::Mul:
377 (void)BVal->smul_ov(*CVal, Overflow);
378 break;
379 default:
380 // Conservatively return false for other opcodes.
381 return false;
382 }
383 return !Overflow;
384}
385
388 return OBO && OBO->hasNoUnsignedWrap();
389}
390
393 return OBO && OBO->hasNoSignedWrap();
394}
395
396/// Conservatively clears subclassOptionalData after a reassociation or
397/// commutation. We preserve fast-math flags when applicable as they can be
398/// preserved.
401 if (!FPMO) {
402 I.clearSubclassOptionalData();
403 return;
404 }
405
406 FastMathFlags FMF = I.getFastMathFlags();
407 I.clearSubclassOptionalData();
408 I.setFastMathFlags(FMF);
409}
410
411/// Combine constant operands of associative operations either before or after a
412/// cast to eliminate one of the associative operations:
413/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
414/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
416 InstCombinerImpl &IC) {
417 auto *Cast = dyn_cast<CastInst>(BinOp1->getOperand(0));
418 if (!Cast || !Cast->hasOneUse())
419 return false;
420
421 // TODO: Enhance logic for other casts and remove this check.
422 auto CastOpcode = Cast->getOpcode();
423 if (CastOpcode != Instruction::ZExt)
424 return false;
425
426 // TODO: Enhance logic for other BinOps and remove this check.
427 if (!BinOp1->isBitwiseLogicOp())
428 return false;
429
430 auto AssocOpcode = BinOp1->getOpcode();
431 auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
432 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
433 return false;
434
435 Constant *C1, *C2;
436 if (!match(BinOp1->getOperand(1), m_Constant(C1)) ||
437 !match(BinOp2->getOperand(1), m_Constant(C2)))
438 return false;
439
440 // TODO: This assumes a zext cast.
441 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
442 // to the destination type might lose bits.
443
444 // Fold the constants together in the destination type:
445 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
446 const DataLayout &DL = IC.getDataLayout();
447 Type *DestTy = C1->getType();
448 Constant *CastC2 = ConstantFoldCastOperand(CastOpcode, C2, DestTy, DL);
449 if (!CastC2)
450 return false;
451 Constant *FoldedC = ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, DL);
452 if (!FoldedC)
453 return false;
454
455 IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
456 IC.replaceOperand(*BinOp1, 1, FoldedC);
458 Cast->dropPoisonGeneratingFlags();
459 return true;
460}
461
462// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
463// inttoptr ( ptrtoint (x) ) --> x
464Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
465 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
466 if (IntToPtr && DL.getTypeSizeInBits(IntToPtr->getDestTy()) ==
467 DL.getTypeSizeInBits(IntToPtr->getSrcTy())) {
468 auto *PtrToInt = dyn_cast<PtrToIntInst>(IntToPtr->getOperand(0));
469 Type *CastTy = IntToPtr->getDestTy();
470 if (PtrToInt &&
471 CastTy->getPointerAddressSpace() ==
472 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
473 DL.getTypeSizeInBits(PtrToInt->getSrcTy()) ==
474 DL.getTypeSizeInBits(PtrToInt->getDestTy()))
475 return PtrToInt->getOperand(0);
476 }
477 return nullptr;
478}
479
480/// This performs a few simplifications for operators that are associative or
481/// commutative:
482///
483/// Commutative operators:
484///
485/// 1. Order operands such that they are listed from right (least complex) to
486/// left (most complex). This puts constants before unary operators before
487/// binary operators.
488///
489/// Associative operators:
490///
491/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
492/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
493///
494/// Associative and commutative operators:
495///
496/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
497/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
498/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
499/// if C1 and C2 are constants.
501 Instruction::BinaryOps Opcode = I.getOpcode();
502 bool Changed = false;
503
504 do {
505 // Order operands such that they are listed from right (least complex) to
506 // left (most complex). This puts constants before unary operators before
507 // binary operators.
508 if (I.isCommutative() && getComplexity(I.getOperand(0)) <
509 getComplexity(I.getOperand(1)))
510 Changed = !I.swapOperands();
511
512 if (I.isCommutative()) {
513 if (auto Pair = matchSymmetricPair(I.getOperand(0), I.getOperand(1))) {
514 replaceOperand(I, 0, Pair->first);
515 replaceOperand(I, 1, Pair->second);
516 Changed = true;
517 }
518 }
519
520 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
521 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
522
523 if (I.isAssociative()) {
524 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
525 if (Op0 && Op0->getOpcode() == Opcode) {
526 Value *A = Op0->getOperand(0);
527 Value *B = Op0->getOperand(1);
528 Value *C = I.getOperand(1);
529
530 // Does "B op C" simplify?
531 if (Value *V = simplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) {
532 // It simplifies to V. Form "A op V".
533 replaceOperand(I, 0, A);
534 replaceOperand(I, 1, V);
535 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0);
536 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(*Op0);
537
538 // Conservatively clear all optional flags since they may not be
539 // preserved by the reassociation. Reset nsw/nuw based on the above
540 // analysis.
542
543 // Note: this is only valid because SimplifyBinOp doesn't look at
544 // the operands to Op0.
545 if (IsNUW)
546 I.setHasNoUnsignedWrap(true);
547
548 if (IsNSW)
549 I.setHasNoSignedWrap(true);
550
551 Changed = true;
552 ++NumReassoc;
553 continue;
554 }
555 }
556
557 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
558 if (Op1 && Op1->getOpcode() == Opcode) {
559 Value *A = I.getOperand(0);
560 Value *B = Op1->getOperand(0);
561 Value *C = Op1->getOperand(1);
562
563 // Does "A op B" simplify?
564 if (Value *V = simplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) {
565 // It simplifies to V. Form "V op C".
566 replaceOperand(I, 0, V);
567 replaceOperand(I, 1, C);
568 // Conservatively clear the optional flags, since they may not be
569 // preserved by the reassociation.
571 Changed = true;
572 ++NumReassoc;
573 continue;
574 }
575 }
576 }
577
578 if (I.isAssociative() && I.isCommutative()) {
579 if (simplifyAssocCastAssoc(&I, *this)) {
580 Changed = true;
581 ++NumReassoc;
582 continue;
583 }
584
585 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
586 if (Op0 && Op0->getOpcode() == Opcode) {
587 Value *A = Op0->getOperand(0);
588 Value *B = Op0->getOperand(1);
589 Value *C = I.getOperand(1);
590
591 // Does "C op A" simplify?
592 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
593 // It simplifies to V. Form "V op B".
594 replaceOperand(I, 0, V);
595 replaceOperand(I, 1, B);
596 // Conservatively clear the optional flags, since they may not be
597 // preserved by the reassociation.
599 Changed = true;
600 ++NumReassoc;
601 continue;
602 }
603 }
604
605 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
606 if (Op1 && Op1->getOpcode() == Opcode) {
607 Value *A = I.getOperand(0);
608 Value *B = Op1->getOperand(0);
609 Value *C = Op1->getOperand(1);
610
611 // Does "C op A" simplify?
612 if (Value *V = simplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) {
613 // It simplifies to V. Form "B op V".
614 replaceOperand(I, 0, B);
615 replaceOperand(I, 1, V);
616 // Conservatively clear the optional flags, since they may not be
617 // preserved by the reassociation.
619 Changed = true;
620 ++NumReassoc;
621 continue;
622 }
623 }
624
625 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
626 // if C1 and C2 are constants.
627 Value *A, *B;
628 Constant *C1, *C2, *CRes;
629 if (Op0 && Op1 &&
630 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
631 match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
632 match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
633 (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
634 bool IsNUW = hasNoUnsignedWrap(I) &&
635 hasNoUnsignedWrap(*Op0) &&
636 hasNoUnsignedWrap(*Op1);
637 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
638 BinaryOperator::CreateNUW(Opcode, A, B) :
639 BinaryOperator::Create(Opcode, A, B);
640
641 if (isa<FPMathOperator>(NewBO)) {
642 FastMathFlags Flags = I.getFastMathFlags() &
643 Op0->getFastMathFlags() &
644 Op1->getFastMathFlags();
645 NewBO->setFastMathFlags(Flags);
646 }
647 InsertNewInstWith(NewBO, I.getIterator());
648 NewBO->takeName(Op1);
649 replaceOperand(I, 0, NewBO);
650 replaceOperand(I, 1, CRes);
651 // Conservatively clear the optional flags, since they may not be
652 // preserved by the reassociation.
654 if (IsNUW)
655 I.setHasNoUnsignedWrap(true);
656
657 Changed = true;
658 continue;
659 }
660 }
661
662 // No further simplifications.
663 return Changed;
664 } while (true);
665}
666
667/// Return whether "X LOp (Y ROp Z)" is always equal to
668/// "(X LOp Y) ROp (X LOp Z)".
671 // X & (Y | Z) <--> (X & Y) | (X & Z)
672 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
673 if (LOp == Instruction::And)
674 return ROp == Instruction::Or || ROp == Instruction::Xor;
675
676 // X | (Y & Z) <--> (X | Y) & (X | Z)
677 if (LOp == Instruction::Or)
678 return ROp == Instruction::And;
679
680 // X * (Y + Z) <--> (X * Y) + (X * Z)
681 // X * (Y - Z) <--> (X * Y) - (X * Z)
682 if (LOp == Instruction::Mul)
683 return ROp == Instruction::Add || ROp == Instruction::Sub;
684
685 return false;
686}
687
688/// Return whether "(X LOp Y) ROp Z" is always equal to
689/// "(X ROp Z) LOp (Y ROp Z)".
693 return leftDistributesOverRight(ROp, LOp);
694
695 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
697
698 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
699 // but this requires knowing that the addition does not overflow and other
700 // such subtleties.
701}
702
703/// This function returns identity value for given opcode, which can be used to
704/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
706 if (isa<Constant>(V))
707 return nullptr;
708
709 return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
710}
711
712/// This function predicates factorization using distributive laws. By default,
713/// it just returns the 'Op' inputs. But for special-cases like
714/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
715/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
716/// allow more factorization opportunities.
719 Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
720 assert(Op && "Expected a binary operator");
721 LHS = Op->getOperand(0);
722 RHS = Op->getOperand(1);
723 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
724 Constant *C;
725 if (match(Op, m_Shl(m_Value(), m_ImmConstant(C)))) {
726 // X << C --> X * (1 << C)
728 Instruction::Shl, ConstantInt::get(Op->getType(), 1), C);
729 assert(RHS && "Constant folding of immediate constants failed");
730 return Instruction::Mul;
731 }
732 // TODO: We can add other conversions e.g. shr => div etc.
733 }
734 if (Instruction::isBitwiseLogicOp(TopOpcode)) {
735 if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
737 // lshr nneg C, X --> ashr nneg C, X
738 return Instruction::AShr;
739 }
740 }
741 return Op->getOpcode();
742}
743
744/// This tries to simplify binary operations by factorizing out common terms
745/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
748 Instruction::BinaryOps InnerOpcode, Value *A,
749 Value *B, Value *C, Value *D) {
750 assert(A && B && C && D && "All values must be provided");
751
752 Value *V = nullptr;
753 Value *RetVal = nullptr;
754 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
755 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
756
757 // Does "X op' Y" always equal "Y op' X"?
758 bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
759
760 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
761 if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) {
762 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
763 // commutative case, "(A op' B) op (C op' A)"?
764 if (A == C || (InnerCommutative && A == D)) {
765 if (A != C)
766 std::swap(C, D);
767 // Consider forming "A op' (B op D)".
768 // If "B op D" simplifies then it can be formed with no cost.
769 V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I));
770
771 // If "B op D" doesn't simplify then only go on if one of the existing
772 // operations "A op' B" and "C op' D" will be zapped as no longer used.
773 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
774 V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
775 if (V)
776 RetVal = Builder.CreateBinOp(InnerOpcode, A, V);
777 }
778 }
779
780 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
781 if (!RetVal && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) {
782 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
783 // commutative case, "(A op' B) op (B op' D)"?
784 if (B == D || (InnerCommutative && B == C)) {
785 if (B != D)
786 std::swap(C, D);
787 // Consider forming "(A op C) op' B".
788 // If "A op C" simplifies then it can be formed with no cost.
789 V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I));
790
791 // If "A op C" doesn't simplify then only go on if one of the existing
792 // operations "A op' B" and "C op' D" will be zapped as no longer used.
793 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
794 V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
795 if (V)
796 RetVal = Builder.CreateBinOp(InnerOpcode, V, B);
797 }
798 }
799
800 if (!RetVal)
801 return nullptr;
802
803 ++NumFactor;
804 RetVal->takeName(&I);
805
806 // Try to add no-overflow flags to the final value.
807 if (isa<BinaryOperator>(RetVal)) {
808 bool HasNSW = false;
809 bool HasNUW = false;
811 HasNSW = I.hasNoSignedWrap();
812 HasNUW = I.hasNoUnsignedWrap();
813 }
814 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {
815 HasNSW &= LOBO->hasNoSignedWrap();
816 HasNUW &= LOBO->hasNoUnsignedWrap();
817 }
818
819 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {
820 HasNSW &= ROBO->hasNoSignedWrap();
821 HasNUW &= ROBO->hasNoUnsignedWrap();
822 }
823
824 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
825 // We can propagate 'nsw' if we know that
826 // %Y = mul nsw i16 %X, C
827 // %Z = add nsw i16 %Y, %X
828 // =>
829 // %Z = mul nsw i16 %X, C+1
830 //
831 // iff C+1 isn't INT_MIN
832 const APInt *CInt;
833 if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
834 cast<Instruction>(RetVal)->setHasNoSignedWrap(HasNSW);
835
836 // nuw can be propagated with any constant or nuw value.
837 cast<Instruction>(RetVal)->setHasNoUnsignedWrap(HasNUW);
838 }
839 }
840 return RetVal;
841}
842
843// If `I` has one Const operand and the other matches `(ctpop (not x))`,
844// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
845// This is only useful is the new subtract can fold so we only handle the
846// following cases:
847// 1) (add/sub/disjoint_or C, (ctpop (not x))
848// -> (add/sub/disjoint_or C', (ctpop x))
849// 1) (cmp pred C, (ctpop (not x))
850// -> (cmp pred C', (ctpop x))
852 unsigned Opc = I->getOpcode();
853 unsigned ConstIdx = 1;
854 switch (Opc) {
855 default:
856 return nullptr;
857 // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
858 // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
859 // is constant.
860 case Instruction::Sub:
861 ConstIdx = 0;
862 break;
863 case Instruction::ICmp:
864 // Signed predicates aren't correct in some edge cases like for i2 types, as
865 // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
866 // comparisons against it are simplfied to unsigned.
867 if (cast<ICmpInst>(I)->isSigned())
868 return nullptr;
869 break;
870 case Instruction::Or:
871 if (!match(I, m_DisjointOr(m_Value(), m_Value())))
872 return nullptr;
873 [[fallthrough]];
874 case Instruction::Add:
875 break;
876 }
877
878 Value *Op;
879 // Find ctpop.
880 if (!match(I->getOperand(1 - ConstIdx),
882 return nullptr;
883
884 Constant *C;
885 // Check other operand is ImmConstant.
886 if (!match(I->getOperand(ConstIdx), m_ImmConstant(C)))
887 return nullptr;
888
889 Type *Ty = Op->getType();
890 Constant *BitWidthC = ConstantInt::get(Ty, Ty->getScalarSizeInBits());
891 // Need extra check for icmp. Note if this check is true, it generally means
892 // the icmp will simplify to true/false.
893 if (Opc == Instruction::ICmp && !cast<ICmpInst>(I)->isEquality()) {
894 Constant *Cmp =
896 if (!Cmp || !Cmp->isNullValue())
897 return nullptr;
898 }
899
900 // Check we can invert `(not x)` for free.
901 bool Consumes = false;
902 if (!isFreeToInvert(Op, Op->hasOneUse(), Consumes) || !Consumes)
903 return nullptr;
904 Value *NotOp = getFreelyInverted(Op, Op->hasOneUse(), &Builder);
905 assert(NotOp != nullptr &&
906 "Desync between isFreeToInvert and getFreelyInverted");
907
908 Value *CtpopOfNotOp = Builder.CreateIntrinsic(Ty, Intrinsic::ctpop, NotOp);
909
910 Value *R = nullptr;
911
912 // Do the transformation here to avoid potentially introducing an infinite
913 // loop.
914 switch (Opc) {
915 case Instruction::Sub:
916 R = Builder.CreateAdd(CtpopOfNotOp, ConstantExpr::getSub(C, BitWidthC));
917 break;
918 case Instruction::Or:
919 case Instruction::Add:
920 R = Builder.CreateSub(ConstantExpr::getAdd(C, BitWidthC), CtpopOfNotOp);
921 break;
922 case Instruction::ICmp:
923 R = Builder.CreateICmp(cast<ICmpInst>(I)->getSwappedPredicate(),
924 CtpopOfNotOp, ConstantExpr::getSub(BitWidthC, C));
925 break;
926 default:
927 llvm_unreachable("Unhandled Opcode");
928 }
929 assert(R != nullptr);
930 return replaceInstUsesWith(*I, R);
931}
932
933// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
934// IFF
935// 1) the logic_shifts match
936// 2) either both binops are binops and one is `and` or
937// BinOp1 is `and`
938// (logic_shift (inv_logic_shift C1, C), C) == C1 or
939//
940// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
941//
942// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
943// IFF
944// 1) the logic_shifts match
945// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
946//
947// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
948//
949// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
950// IFF
951// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
952// 2) Binop2 is `not`
953//
954// -> (arithmetic_shift Binop1((not X), Y), Amt)
955
957 const DataLayout &DL = I.getDataLayout();
958 auto IsValidBinOpc = [](unsigned Opc) {
959 switch (Opc) {
960 default:
961 return false;
962 case Instruction::And:
963 case Instruction::Or:
964 case Instruction::Xor:
965 case Instruction::Add:
966 // Skip Sub as we only match constant masks which will canonicalize to use
967 // add.
968 return true;
969 }
970 };
971
972 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
973 // constraints.
974 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
975 unsigned ShOpc) {
976 assert(ShOpc != Instruction::AShr);
977 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
978 ShOpc == Instruction::Shl;
979 };
980
981 auto GetInvShift = [](unsigned ShOpc) {
982 assert(ShOpc != Instruction::AShr);
983 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
984 };
985
986 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
987 unsigned ShOpc, Constant *CMask,
988 Constant *CShift) {
989 // If the BinOp1 is `and` we don't need to check the mask.
990 if (BinOpc1 == Instruction::And)
991 return true;
992
993 // For all other possible transfers we need complete distributable
994 // binop/shift (anything but `add` + `lshr`).
995 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
996 return false;
997
998 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
999 // vecs, otherwise the mask will be simplified and the following check will
1000 // handle it).
1001 if (BinOpc2 == Instruction::And)
1002 return true;
1003
1004 // Otherwise, need mask that meets the below requirement.
1005 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
1006 Constant *MaskInvShift =
1007 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
1008 return ConstantFoldBinaryOpOperands(ShOpc, MaskInvShift, CShift, DL) ==
1009 CMask;
1010 };
1011
1012 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
1013 Constant *CMask, *CShift;
1014 Value *X, *Y, *ShiftedX, *Mask, *Shift;
1015 if (!match(I.getOperand(ShOpnum),
1016 m_OneUse(m_Shift(m_Value(Y), m_Value(Shift)))))
1017 return nullptr;
1018 if (!match(I.getOperand(1 - ShOpnum),
1020 m_OneUse(m_Shift(m_Value(X), m_Specific(Shift))),
1021 m_Value(ShiftedX)),
1022 m_Value(Mask))))
1023 return nullptr;
1024 // Make sure we are matching instruction shifts and not ConstantExpr
1025 auto *IY = dyn_cast<Instruction>(I.getOperand(ShOpnum));
1026 auto *IX = dyn_cast<Instruction>(ShiftedX);
1027 if (!IY || !IX)
1028 return nullptr;
1029
1030 // LHS and RHS need same shift opcode
1031 unsigned ShOpc = IY->getOpcode();
1032 if (ShOpc != IX->getOpcode())
1033 return nullptr;
1034
1035 // Make sure binop is real instruction and not ConstantExpr
1036 auto *BO2 = dyn_cast<Instruction>(I.getOperand(1 - ShOpnum));
1037 if (!BO2)
1038 return nullptr;
1039
1040 unsigned BinOpc = BO2->getOpcode();
1041 // Make sure we have valid binops.
1042 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
1043 return nullptr;
1044
1045 if (ShOpc == Instruction::AShr) {
1046 if (Instruction::isBitwiseLogicOp(I.getOpcode()) &&
1047 BinOpc == Instruction::Xor && match(Mask, m_AllOnes())) {
1048 Value *NotX = Builder.CreateNot(X);
1049 Value *NewBinOp = Builder.CreateBinOp(I.getOpcode(), Y, NotX);
1051 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp, Shift);
1052 }
1053
1054 return nullptr;
1055 }
1056
1057 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
1058 // distribute to drop the shift irrelevant of constants.
1059 if (BinOpc == I.getOpcode() &&
1060 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
1061 Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y);
1062 Value *NewBinOp1 = Builder.CreateBinOp(
1063 static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp2, Shift);
1064 return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask);
1065 }
1066
1067 // Otherwise we can only distribute by constant shifting the mask, so
1068 // ensure we have constants.
1069 if (!match(Shift, m_ImmConstant(CShift)))
1070 return nullptr;
1071 if (!match(Mask, m_ImmConstant(CMask)))
1072 return nullptr;
1073
1074 // Check if we can distribute the binops.
1075 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
1076 return nullptr;
1077
1078 Constant *NewCMask =
1079 ConstantFoldBinaryOpOperands(GetInvShift(ShOpc), CMask, CShift, DL);
1080 Value *NewBinOp2 = Builder.CreateBinOp(
1081 static_cast<Instruction::BinaryOps>(BinOpc), X, NewCMask);
1082 Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2);
1083 return BinaryOperator::Create(static_cast<Instruction::BinaryOps>(ShOpc),
1084 NewBinOp1, CShift);
1085 };
1086
1087 if (Instruction *R = MatchBinOp(0))
1088 return R;
1089 return MatchBinOp(1);
1090}
1091
1092// (Binop (zext C), (select C, T, F))
1093// -> (select C, (binop 1, T), (binop 0, F))
1094//
1095// (Binop (sext C), (select C, T, F))
1096// -> (select C, (binop -1, T), (binop 0, F))
1097//
1098// Attempt to simplify binary operations into a select with folded args, when
1099// one operand of the binop is a select instruction and the other operand is a
1100// zext/sext extension, whose value is the select condition.
1103 // TODO: this simplification may be extended to any speculatable instruction,
1104 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
1105 Instruction::BinaryOps Opc = I.getOpcode();
1106 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1107 Value *A, *CondVal, *TrueVal, *FalseVal;
1108 Value *CastOp;
1109 Constant *CastTrueVal, *CastFalseVal;
1110
1111 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
1112 return match(CastOp, m_SelectLike(m_Value(A), m_Constant(CastTrueVal),
1113 m_Constant(CastFalseVal))) &&
1114 match(SelectOp, m_Select(m_Value(CondVal), m_Value(TrueVal),
1115 m_Value(FalseVal)));
1116 };
1117
1118 // Make sure one side of the binop is a select instruction, and the other is a
1119 // zero/sign extension operating on a i1.
1120 if (MatchSelectAndCast(LHS, RHS))
1121 CastOp = LHS;
1122 else if (MatchSelectAndCast(RHS, LHS))
1123 CastOp = RHS;
1124 else
1125 return nullptr;
1126
1128 ? nullptr
1129 : cast<SelectInst>(CastOp == LHS ? RHS : LHS);
1130
1131 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1132 bool IsCastOpRHS = (CastOp == RHS);
1133 Value *CastVal = IsTrueArm ? CastFalseVal : CastTrueVal;
1134
1135 return IsCastOpRHS ? Builder.CreateBinOp(Opc, V, CastVal)
1136 : Builder.CreateBinOp(Opc, CastVal, V);
1137 };
1138
1139 // If the value used in the zext/sext is the select condition, or the negated
1140 // of the select condition, the binop can be simplified.
1141 if (CondVal == A) {
1142 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
1143 return SelectInst::Create(CondVal, NewTrueVal,
1144 NewFoldedConst(true, FalseVal), "", nullptr, SI);
1145 }
1146 if (match(A, m_Not(m_Specific(CondVal)))) {
1147 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
1148 return SelectInst::Create(CondVal, NewTrueVal,
1149 NewFoldedConst(false, FalseVal), "", nullptr, SI);
1150 }
1151
1152 return nullptr;
1153}
1154
1156 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1159 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1160 Value *A, *B, *C, *D;
1161 Instruction::BinaryOps LHSOpcode, RHSOpcode;
1162
1163 if (Op0)
1164 LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B, Op1);
1165 if (Op1)
1166 RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D, Op0);
1167
1168 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1169 // a common term.
1170 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1171 if (Value *V = tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, D))
1172 return V;
1173
1174 // The instruction has the form "(A op' B) op (C)". Try to factorize common
1175 // term.
1176 if (Op0)
1177 if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
1178 if (Value *V =
1179 tryFactorization(I, SQ, Builder, LHSOpcode, A, B, RHS, Ident))
1180 return V;
1181
1182 // The instruction has the form "(B) op (C op' D)". Try to factorize common
1183 // term.
1184 if (Op1)
1185 if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
1186 if (Value *V =
1187 tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D))
1188 return V;
1189
1190 return nullptr;
1191}
1192
1193/// This tries to simplify binary operations which some other binary operation
1194/// distributes over either by factorizing out common terms
1195/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
1196/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
1197/// Returns the simplified value, or null if it didn't simplify.
1199 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
1202 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1203
1204 // Factorization.
1205 if (Value *R = tryFactorizationFolds(I))
1206 return R;
1207
1208 // Expansion.
1209 if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
1210 // The instruction has the form "(A op' B) op C". See if expanding it out
1211 // to "(A op C) op' (B op C)" results in simplifications.
1212 Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
1213 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1214
1215 // Disable the use of undef because it's not safe to distribute undef.
1216 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1217 Value *L = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1218 Value *R = simplifyBinOp(TopLevelOpcode, B, C, SQDistributive);
1219
1220 // Do "A op C" and "B op C" both simplify?
1221 if (L && R) {
1222 // They do! Return "L op' R".
1223 ++NumExpand;
1224 C = Builder.CreateBinOp(InnerOpcode, L, R);
1225 C->takeName(&I);
1226 return C;
1227 }
1228
1229 // Does "A op C" simplify to the identity value for the inner opcode?
1230 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1231 // They do! Return "B op C".
1232 ++NumExpand;
1233 C = Builder.CreateBinOp(TopLevelOpcode, B, C);
1234 C->takeName(&I);
1235 return C;
1236 }
1237
1238 // Does "B op C" simplify to the identity value for the inner opcode?
1239 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1240 // They do! Return "A op C".
1241 ++NumExpand;
1242 C = Builder.CreateBinOp(TopLevelOpcode, A, C);
1243 C->takeName(&I);
1244 return C;
1245 }
1246 }
1247
1248 if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
1249 // The instruction has the form "A op (B op' C)". See if expanding it out
1250 // to "(A op B) op' (A op C)" results in simplifications.
1251 Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
1252 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1253
1254 // Disable the use of undef because it's not safe to distribute undef.
1255 auto SQDistributive = SQ.getWithInstruction(&I).getWithoutUndef();
1256 Value *L = simplifyBinOp(TopLevelOpcode, A, B, SQDistributive);
1257 Value *R = simplifyBinOp(TopLevelOpcode, A, C, SQDistributive);
1258
1259 // Do "A op B" and "A op C" both simplify?
1260 if (L && R) {
1261 // They do! Return "L op' R".
1262 ++NumExpand;
1263 A = Builder.CreateBinOp(InnerOpcode, L, R);
1264 A->takeName(&I);
1265 return A;
1266 }
1267
1268 // Does "A op B" simplify to the identity value for the inner opcode?
1269 if (L && L == ConstantExpr::getBinOpIdentity(InnerOpcode, L->getType())) {
1270 // They do! Return "A op C".
1271 ++NumExpand;
1272 A = Builder.CreateBinOp(TopLevelOpcode, A, C);
1273 A->takeName(&I);
1274 return A;
1275 }
1276
1277 // Does "A op C" simplify to the identity value for the inner opcode?
1278 if (R && R == ConstantExpr::getBinOpIdentity(InnerOpcode, R->getType())) {
1279 // They do! Return "A op B".
1280 ++NumExpand;
1281 A = Builder.CreateBinOp(TopLevelOpcode, A, B);
1282 A->takeName(&I);
1283 return A;
1284 }
1285 }
1286
1287 return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
1288}
1289
1290static std::optional<std::pair<Value *, Value *>>
1292 if (LHS->getParent() != RHS->getParent())
1293 return std::nullopt;
1294
1295 if (LHS->getNumIncomingValues() < 2)
1296 return std::nullopt;
1297
1298 if (!equal(LHS->blocks(), RHS->blocks()))
1299 return std::nullopt;
1300
1301 Value *L0 = LHS->getIncomingValue(0);
1302 Value *R0 = RHS->getIncomingValue(0);
1303
1304 for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
1305 Value *L1 = LHS->getIncomingValue(I);
1306 Value *R1 = RHS->getIncomingValue(I);
1307
1308 if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
1309 continue;
1310
1311 return std::nullopt;
1312 }
1313
1314 return std::optional(std::pair(L0, R0));
1315}
1316
1317std::optional<std::pair<Value *, Value *>>
1318InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) {
1321 if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode())
1322 return std::nullopt;
1323 switch (LHSInst->getOpcode()) {
1324 case Instruction::PHI:
1326 case Instruction::Select: {
1327 Value *Cond = LHSInst->getOperand(0);
1328 Value *TrueVal = LHSInst->getOperand(1);
1329 Value *FalseVal = LHSInst->getOperand(2);
1330 if (Cond == RHSInst->getOperand(0) && TrueVal == RHSInst->getOperand(2) &&
1331 FalseVal == RHSInst->getOperand(1))
1332 return std::pair(TrueVal, FalseVal);
1333 return std::nullopt;
1334 }
1335 case Instruction::Call: {
1336 // Match min(a, b) and max(a, b)
1337 MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(LHSInst);
1338 MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(RHSInst);
1339 if (LHSMinMax && RHSMinMax &&
1340 LHSMinMax->getPredicate() ==
1342 ((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1343 LHSMinMax->getRHS() == RHSMinMax->getRHS()) ||
1344 (LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1345 LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1346 return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1347 return std::nullopt;
1348 }
1349 default:
1350 return std::nullopt;
1351 }
1352}
1353
1355 Value *LHS,
1356 Value *RHS) {
1357 Value *A, *B, *C, *D, *E, *F;
1358 bool LHSIsSelect = match(LHS, m_Select(m_Value(A), m_Value(B), m_Value(C)));
1359 bool RHSIsSelect = match(RHS, m_Select(m_Value(D), m_Value(E), m_Value(F)));
1360 if (!LHSIsSelect && !RHSIsSelect)
1361 return nullptr;
1362
1364 ? nullptr
1365 : cast<SelectInst>(LHSIsSelect ? LHS : RHS);
1366
1367 FastMathFlags FMF;
1369 if (const auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
1370 FMF = FPOp->getFastMathFlags();
1371 Builder.setFastMathFlags(FMF);
1372 }
1373
1374 Instruction::BinaryOps Opcode = I.getOpcode();
1375 SimplifyQuery Q = SQ.getWithInstruction(&I);
1376
1377 Value *Cond, *True = nullptr, *False = nullptr;
1378
1379 // Special-case for add/negate combination. Replace the zero in the negation
1380 // with the trailing add operand:
1381 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1382 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1383 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1384 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1385 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1386 return nullptr;
1387 Value *N;
1388 if (True && match(FVal, m_Neg(m_Value(N)))) {
1389 Value *Sub = Builder.CreateSub(Z, N);
1390 return Builder.CreateSelect(Cond, True, Sub, I.getName(), SI);
1391 }
1392 if (False && match(TVal, m_Neg(m_Value(N)))) {
1393 Value *Sub = Builder.CreateSub(Z, N);
1394 return Builder.CreateSelect(Cond, Sub, False, I.getName(), SI);
1395 }
1396 return nullptr;
1397 };
1398
1399 if (LHSIsSelect && RHSIsSelect && A == D) {
1400 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1401 Cond = A;
1402 True = simplifyBinOp(Opcode, B, E, FMF, Q);
1403 False = simplifyBinOp(Opcode, C, F, FMF, Q);
1404
1405 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1406 if (False && !True)
1407 True = Builder.CreateBinOp(Opcode, B, E);
1408 else if (True && !False)
1409 False = Builder.CreateBinOp(Opcode, C, F);
1410 }
1411 } else if (LHSIsSelect && LHS->hasOneUse()) {
1412 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1413 Cond = A;
1414 True = simplifyBinOp(Opcode, B, RHS, FMF, Q);
1415 False = simplifyBinOp(Opcode, C, RHS, FMF, Q);
1416 if (Value *NewSel = foldAddNegate(B, C, RHS))
1417 return NewSel;
1418 } else if (RHSIsSelect && RHS->hasOneUse()) {
1419 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1420 Cond = D;
1421 True = simplifyBinOp(Opcode, LHS, E, FMF, Q);
1422 False = simplifyBinOp(Opcode, LHS, F, FMF, Q);
1423 if (Value *NewSel = foldAddNegate(E, F, LHS))
1424 return NewSel;
1425 }
1426
1427 if (!True || !False)
1428 return nullptr;
1429
1430 Value *NewSI = Builder.CreateSelect(Cond, True, False, I.getName(), SI);
1431 NewSI->takeName(&I);
1432 return NewSI;
1433}
1434
1435/// Freely adapt every user of V as-if V was changed to !V.
1436/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1438 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1439 for (User *U : make_early_inc_range(I->users())) {
1440 if (U == IgnoredUser)
1441 continue; // Don't consider this user.
1442 switch (cast<Instruction>(U)->getOpcode()) {
1443 case Instruction::Select: {
1444 auto *SI = cast<SelectInst>(U);
1445 SI->swapValues();
1446 SI->swapProfMetadata();
1447 break;
1448 }
1449 case Instruction::CondBr: {
1451 BI->swapSuccessors(); // swaps prof metadata too
1452 if (BPI)
1453 BPI->swapSuccEdgesProbabilities(BI->getParent());
1454 break;
1455 }
1456 case Instruction::Xor:
1458 // Add to worklist for DCE.
1460 break;
1461 default:
1462 llvm_unreachable("Got unexpected user - out of sync with "
1463 "canFreelyInvertAllUsersOf() ?");
1464 }
1465 }
1466
1467 // Update pre-existing debug value uses.
1468 SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
1469 llvm::findDbgValues(I, DbgVariableRecords);
1470
1471 for (DbgVariableRecord *DbgVal : DbgVariableRecords) {
1472 SmallVector<uint64_t, 1> Ops = {dwarf::DW_OP_not};
1473 for (unsigned Idx = 0, End = DbgVal->getNumVariableLocationOps();
1474 Idx != End; ++Idx)
1475 if (DbgVal->getVariableLocationOp(Idx) == I)
1476 DbgVal->setExpression(
1477 DIExpression::appendOpsToArg(DbgVal->getExpression(), Ops, Idx));
1478 }
1479}
1480
1481/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1482/// constant zero (which is the 'negate' form).
1483Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1484 Value *NegV;
1485 if (match(V, m_Neg(m_Value(NegV))))
1486 return NegV;
1487
1488 // Constants can be considered to be negated values if they can be folded.
1490 return ConstantExpr::getNeg(C);
1491
1493 if (C->getType()->getElementType()->isIntegerTy())
1494 return ConstantExpr::getNeg(C);
1495
1497 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1498 Constant *Elt = CV->getAggregateElement(i);
1499 if (!Elt)
1500 return nullptr;
1501
1502 if (isa<UndefValue>(Elt))
1503 continue;
1504
1505 if (!isa<ConstantInt>(Elt))
1506 return nullptr;
1507 }
1508 return ConstantExpr::getNeg(CV);
1509 }
1510
1511 // Negate integer vector splats.
1512 if (auto *CV = dyn_cast<Constant>(V))
1513 if (CV->getType()->isVectorTy() &&
1514 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1515 return ConstantExpr::getNeg(CV);
1516
1517 return nullptr;
1518}
1519
1520// Try to fold:
1521// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1522// -> ({s|u}itofp (int_binop x, y))
1523// 2) (fp_binop ({s|u}itofp x), FpC)
1524// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1525//
1526// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1527Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1528 BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
1530
1531 Type *FPTy = BO.getType();
1532 Type *IntTy = IntOps[0]->getType();
1533
1534 unsigned IntSz = IntTy->getScalarSizeInBits();
1535 // This is the maximum number of inuse bits by the integer where the int -> fp
1536 // casts are exact.
1537 unsigned MaxRepresentableBits =
1539
1540 // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1541 // checks later on.
1542 unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
1543
1544 // NB: This only comes up if OpsFromSigned is true, so there is no need to
1545 // cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1546 auto IsNonZero = [&](unsigned OpNo) -> bool {
1547 if (OpsKnown[OpNo].hasKnownBits() &&
1548 OpsKnown[OpNo].getKnownBits(SQ).isNonZero())
1549 return true;
1550 return isKnownNonZero(IntOps[OpNo], SQ);
1551 };
1552
1553 auto IsNonNeg = [&](unsigned OpNo) -> bool {
1554 // NB: This matches the impl in ValueTracking, we just try to use cached
1555 // knownbits here. If we ever start supporting WithCache for
1556 // `isKnownNonNegative`, change this to an explicit call.
1557 return OpsKnown[OpNo].getKnownBits(SQ).isNonNegative();
1558 };
1559
1560 // Check if we know for certain that ({s|u}itofp op) is exact.
1561 auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1562 // Can we treat this operand as the desired sign?
1563 if (OpsFromSigned != isa<SIToFPInst>(BO.getOperand(OpNo)) &&
1564 !IsNonNeg(OpNo))
1565 return false;
1566
1567 // If fp precision >= bitwidth(op) then its exact.
1568 // NB: This is slightly conservative for `sitofp`. For signed conversion, we
1569 // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1570 // handled specially. We can't, however, increase the bound arbitrarily for
1571 // `sitofp` as for larger sizes, it won't sign extend.
1572 if (MaxRepresentableBits < IntSz) {
1573 // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1574 // numSignBits(op).
1575 // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1576 // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1577 if (OpsFromSigned)
1578 NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(IntOps[OpNo]);
1579 // Finally for unsigned check that fp precision >= bitwidth(op) -
1580 // numLeadingZeros(op).
1581 else {
1582 NumUsedLeadingBits[OpNo] =
1583 IntSz - OpsKnown[OpNo].getKnownBits(SQ).countMinLeadingZeros();
1584 }
1585 }
1586 // NB: We could also check if op is known to be a power of 2 or zero (which
1587 // will always be representable). Its unlikely, however, that is we are
1588 // unable to bound op in any way we will be able to pass the overflow checks
1589 // later on.
1590
1591 if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1592 return false;
1593 // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1594 return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
1595 IsNonZero(OpNo);
1596 };
1597
1598 // If we have a constant rhs, see if we can losslessly convert it to an int.
1599 if (Op1FpC != nullptr) {
1600 // Signed + Mul req non-zero
1601 if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1602 !match(Op1FpC, m_NonZeroFP()))
1603 return nullptr;
1604
1606 OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, Op1FpC,
1607 IntTy, DL);
1608 if (Op1IntC == nullptr)
1609 return nullptr;
1610 if (ConstantFoldCastOperand(OpsFromSigned ? Instruction::SIToFP
1611 : Instruction::UIToFP,
1612 Op1IntC, FPTy, DL) != Op1FpC)
1613 return nullptr;
1614
1615 // First try to keep sign of cast the same.
1616 IntOps[1] = Op1IntC;
1617 }
1618
1619 // Ensure lhs/rhs integer types match.
1620 if (IntTy != IntOps[1]->getType())
1621 return nullptr;
1622
1623 if (Op1FpC == nullptr) {
1624 if (!IsValidPromotion(1))
1625 return nullptr;
1626 }
1627 if (!IsValidPromotion(0))
1628 return nullptr;
1629
1630 // Final we check if the integer version of the binop will not overflow.
1632 // Because of the precision check, we can often rule out overflows.
1633 bool NeedsOverflowCheck = true;
1634 // Try to conservatively rule out overflow based on the already done precision
1635 // checks.
1636 unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
1637 unsigned OverflowMaxCurBits =
1638 std::max(NumUsedLeadingBits[0], NumUsedLeadingBits[1]);
1639 bool OutputSigned = OpsFromSigned;
1640 switch (BO.getOpcode()) {
1641 case Instruction::FAdd:
1642 IntOpc = Instruction::Add;
1643 OverflowMaxOutputBits += OverflowMaxCurBits;
1644 break;
1645 case Instruction::FSub:
1646 IntOpc = Instruction::Sub;
1647 OverflowMaxOutputBits += OverflowMaxCurBits;
1648 break;
1649 case Instruction::FMul:
1650 IntOpc = Instruction::Mul;
1651 OverflowMaxOutputBits += OverflowMaxCurBits * 2;
1652 break;
1653 default:
1654 llvm_unreachable("Unsupported binop");
1655 }
1656 // The precision check may have already ruled out overflow.
1657 if (OverflowMaxOutputBits < IntSz) {
1658 NeedsOverflowCheck = false;
1659 // We can bound unsigned overflow from sub to in range signed value (this is
1660 // what allows us to avoid the overflow check for sub).
1661 if (IntOpc == Instruction::Sub)
1662 OutputSigned = true;
1663 }
1664
1665 // Precision check did not rule out overflow, so need to check.
1666 // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1667 // `IntOps[...]` arguments to `KnownOps[...]`.
1668 if (NeedsOverflowCheck &&
1669 !willNotOverflow(IntOpc, IntOps[0], IntOps[1], BO, OutputSigned))
1670 return nullptr;
1671
1672 Value *IntBinOp = Builder.CreateBinOp(IntOpc, IntOps[0], IntOps[1]);
1673 if (auto *IntBO = dyn_cast<BinaryOperator>(IntBinOp)) {
1674 IntBO->setHasNoSignedWrap(OutputSigned);
1675 IntBO->setHasNoUnsignedWrap(!OutputSigned);
1676 }
1677 if (OutputSigned)
1678 return new SIToFPInst(IntBinOp, FPTy);
1679 return new UIToFPInst(IntBinOp, FPTy);
1680}
1681
1682// Try to fold:
1683// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1684// -> ({s|u}itofp (int_binop x, y))
1685// 2) (fp_binop ({s|u}itofp x), FpC)
1686// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1687Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1688 // Don't perform the fold on vectors, as the integer operation may be much
1689 // more expensive than the float operation in that case.
1690 if (BO.getType()->isVectorTy())
1691 return nullptr;
1692
1693 std::array<Value *, 2> IntOps = {nullptr, nullptr};
1694 Constant *Op1FpC = nullptr;
1695 // Check for:
1696 // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1697 // 2) (binop ({s|u}itofp x), FpC)
1698 if (!match(BO.getOperand(0), m_IToFP(m_Value(IntOps[0]))))
1699 return nullptr;
1700
1701 if (!match(BO.getOperand(1), m_Constant(Op1FpC)) &&
1702 !match(BO.getOperand(1), m_IToFP(m_Value(IntOps[1]))))
1703 return nullptr;
1704
1705 // Cache KnownBits a bit to potentially save some analysis.
1706 SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
1707
1708 // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1709 // different constraints depending on the sign of the cast.
1710 // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1711 if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
1712 IntOps, Op1FpC, OpsKnown))
1713 return R;
1714 return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
1715 Op1FpC, OpsKnown);
1716}
1717
1718/// A binop with a constant operand and a sign-extended boolean operand may be
1719/// converted into a select of constants by applying the binary operation to
1720/// the constant with the two possible values of the extended boolean (0 or -1).
1721Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1722 // TODO: Handle non-commutative binop (constant is operand 0).
1723 // TODO: Handle zext.
1724 // TODO: Peek through 'not' of cast.
1725 Value *BO0 = BO.getOperand(0);
1726 Value *BO1 = BO.getOperand(1);
1727 Value *X;
1728 Constant *C;
1729 if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
1730 !X->getType()->isIntOrIntVectorTy(1))
1731 return nullptr;
1732
1733 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1736 Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
1737 Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
1738 return createSelectInstWithUnknownProfile(X, TVal, FVal);
1739}
1740
1742 bool IsTrueArm) {
1744 for (Value *Op : I.operands()) {
1745 Value *V = nullptr;
1746 if (Op == SI) {
1747 V = IsTrueArm ? SI->getTrueValue() : SI->getFalseValue();
1748 } else if (match(SI->getCondition(),
1751 m_Specific(Op), m_Value(V))) &&
1753 // Pass
1754 } else if (match(Op, m_ZExt(m_Specific(SI->getCondition())))) {
1755 V = IsTrueArm ? ConstantInt::get(Op->getType(), 1)
1756 : ConstantInt::getNullValue(Op->getType());
1757 } else {
1758 V = Op;
1759 }
1760 Ops.push_back(V);
1761 }
1762
1763 return simplifyInstructionWithOperands(&I, Ops, I.getDataLayout());
1764}
1765
1767 Value *NewOp, InstCombiner &IC) {
1768 Instruction *Clone = I.clone();
1769 Clone->replaceUsesOfWith(SI, NewOp);
1771 IC.InsertNewInstBefore(Clone, I.getIterator());
1772 return Clone;
1773}
1774
1776 bool FoldWithMultiUse,
1777 bool SimplifyBothArms) {
1778 // Don't modify shared select instructions unless set FoldWithMultiUse
1779 if (!SI->hasOneUser() && !FoldWithMultiUse)
1780 return nullptr;
1781
1782 Value *TV = SI->getTrueValue();
1783 Value *FV = SI->getFalseValue();
1784
1785 // Bool selects with constant operands can be folded to logical ops.
1786 if (SI->getType()->isIntOrIntVectorTy(1))
1787 return nullptr;
1788
1789 // Avoid breaking min/max reduction pattern,
1790 // which is necessary for vectorization later.
1792 for (Value *IntrinOp : Op.operands())
1793 if (auto *PN = dyn_cast<PHINode>(IntrinOp))
1794 for (Value *PhiOp : PN->operands())
1795 if (PhiOp == &Op)
1796 return nullptr;
1797
1798 // Test if a FCmpInst instruction is used exclusively by a select as
1799 // part of a minimum or maximum operation. If so, refrain from doing
1800 // any other folding. This helps out other analyses which understand
1801 // non-obfuscated minimum and maximum idioms. And in this case, at
1802 // least one of the comparison operands has at least one user besides
1803 // the compare (the select), which would often largely negate the
1804 // benefit of folding anyway.
1805 if (auto *CI = dyn_cast<FCmpInst>(SI->getCondition())) {
1806 if (CI->hasOneUse()) {
1807 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1808 if (((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1)) &&
1809 !CI->isCommutative())
1810 return nullptr;
1811 }
1812 }
1813
1814 // Make sure that one of the select arms folds successfully.
1815 Value *NewTV = simplifyOperationIntoSelectOperand(Op, SI, /*IsTrueArm=*/true);
1816 Value *NewFV =
1817 simplifyOperationIntoSelectOperand(Op, SI, /*IsTrueArm=*/false);
1818 if (!NewTV && !NewFV)
1819 return nullptr;
1820
1821 if (SimplifyBothArms && !(NewTV && NewFV))
1822 return nullptr;
1823
1824 // Create an instruction for the arm that did not fold.
1825 if (!NewTV)
1826 NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
1827 if (!NewFV)
1828 NewFV = foldOperationIntoSelectOperand(Op, SI, FV, *this);
1829 return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
1830}
1831
1833 Value *InValue, BasicBlock *InBB,
1834 const DataLayout &DL,
1835 const SimplifyQuery SQ) {
1836 // NB: It is a precondition of this transform that the operands be
1837 // phi translatable!
1839 for (Value *Op : I.operands()) {
1840 if (Op == PN)
1841 Ops.push_back(InValue);
1842 else
1843 Ops.push_back(Op->DoPHITranslation(PN->getParent(), InBB));
1844 }
1845
1846 // Don't consider the simplification successful if we get back a constant
1847 // expression. That's just an instruction in hiding.
1848 // Also reject the case where we simplify back to the phi node. We wouldn't
1849 // be able to remove it in that case.
1851 &I, Ops, SQ.getWithInstruction(InBB->getTerminator()));
1852 if (NewVal && NewVal != PN && !match(NewVal, m_ConstantExpr()))
1853 return NewVal;
1854
1855 // Check if incoming PHI value can be replaced with constant
1856 // based on implied condition.
1857 CondBrInst *TerminatorBI = dyn_cast<CondBrInst>(InBB->getTerminator());
1858 const ICmpInst *ICmp = dyn_cast<ICmpInst>(&I);
1859 if (TerminatorBI &&
1860 TerminatorBI->getSuccessor(0) != TerminatorBI->getSuccessor(1) && ICmp) {
1861 bool LHSIsTrue = TerminatorBI->getSuccessor(0) == PN->getParent();
1862 std::optional<bool> ImpliedCond = isImpliedCondition(
1863 TerminatorBI->getCondition(), ICmp->getCmpPredicate(), Ops[0], Ops[1],
1864 DL, LHSIsTrue);
1865 if (ImpliedCond)
1866 return ConstantInt::getBool(I.getType(), ImpliedCond.value());
1867 }
1868
1869 return nullptr;
1870}
1871
1872/// In some cases it is beneficial to fold a select into a binary operator.
1873/// For example:
1874/// %1 = or %in, 4
1875/// %2 = select %cond, %1, %in
1876/// %3 = or %2, 1
1877/// =>
1878/// %1 = select i1 %cond, 5, 1
1879/// %2 = or %1, %in
1881 assert(Op.isAssociative() && "The operation must be associative!");
1882
1883 SelectInst *SI = dyn_cast<SelectInst>(Op.getOperand(0));
1884
1885 Constant *Const;
1886 if (!SI || !match(Op.getOperand(1), m_ImmConstant(Const)) ||
1887 !Op.hasOneUse() || !SI->hasOneUse())
1888 return nullptr;
1889
1890 Value *TV = SI->getTrueValue();
1891 Value *FV = SI->getFalseValue();
1892 Value *Input, *NewTV, *NewFV;
1893 Constant *Const2;
1894
1895 if (TV->hasOneUse() && match(TV, m_BinOp(Op.getOpcode(), m_Specific(FV),
1896 m_ImmConstant(Const2)))) {
1897 NewTV = ConstantFoldBinaryInstruction(Op.getOpcode(), Const, Const2);
1898 NewFV = Const;
1899 Input = FV;
1900 } else if (FV->hasOneUse() &&
1901 match(FV, m_BinOp(Op.getOpcode(), m_Specific(TV),
1902 m_ImmConstant(Const2)))) {
1903 NewTV = Const;
1904 NewFV = ConstantFoldBinaryInstruction(Op.getOpcode(), Const, Const2);
1905 Input = TV;
1906 } else
1907 return nullptr;
1908
1909 if (!NewTV || !NewFV)
1910 return nullptr;
1911
1912 Value *NewSI =
1913 Builder.CreateSelect(SI->getCondition(), NewTV, NewFV, "",
1914 ProfcheckDisableMetadataFixes ? nullptr : SI);
1915 return BinaryOperator::Create(Op.getOpcode(), NewSI, Input);
1916}
1917
1919 bool AllowMultipleUses) {
1920 unsigned NumPHIValues = PN->getNumIncomingValues();
1921 if (NumPHIValues == 0)
1922 return nullptr;
1923
1924 // We normally only transform phis with a single use. However, if a PHI has
1925 // multiple uses and they are all the same operation, we can fold *all* of the
1926 // uses into the PHI.
1927 bool OneUse = PN->hasOneUse();
1928 bool IdenticalUsers = false;
1929 if (!AllowMultipleUses && !OneUse) {
1930 // Walk the use list for the instruction, comparing them to I.
1931 for (User *U : PN->users()) {
1933 if (UI != &I && !I.isIdenticalTo(UI))
1934 return nullptr;
1935 }
1936 // Otherwise, we can replace *all* users with the new PHI we form.
1937 IdenticalUsers = true;
1938 }
1939
1940 // Check that all operands are phi-translatable.
1941 for (Value *Op : I.operands()) {
1942 if (Op == PN)
1943 continue;
1944
1945 // Non-instructions never require phi-translation.
1946 auto *I = dyn_cast<Instruction>(Op);
1947 if (!I)
1948 continue;
1949
1950 // Phi-translate can handle phi nodes in the same block.
1951 if (isa<PHINode>(I))
1952 if (I->getParent() == PN->getParent())
1953 continue;
1954
1955 // Operand dominates the block, no phi-translation necessary.
1956 if (DT.dominates(I, PN->getParent()))
1957 continue;
1958
1959 // Not phi-translatable, bail out.
1960 return nullptr;
1961 }
1962
1963 // Check to see whether the instruction can be folded into each phi operand.
1964 // If there is one operand that does not fold, remember the BB it is in.
1965 SmallVector<Value *> NewPhiValues;
1966 SmallVector<unsigned int> OpsToMoveUseToIncomingBB;
1967 bool SeenNonSimplifiedInVal = false;
1968 for (unsigned i = 0; i != NumPHIValues; ++i) {
1969 Value *InVal = PN->getIncomingValue(i);
1970 BasicBlock *InBB = PN->getIncomingBlock(i);
1971
1972 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InVal, InBB, DL, SQ)) {
1973 NewPhiValues.push_back(NewVal);
1974 continue;
1975 }
1976
1977 // Handle some cases that can't be fully simplified, but where we know that
1978 // the two instructions will fold into one.
1979 auto WillFold = [&]() {
1980 if (!InVal->hasUseList() || !InVal->hasOneUser())
1981 return false;
1982
1983 // icmp of ucmp/scmp with constant will fold to icmp.
1984 const APInt *Ignored;
1985 if (isa<CmpIntrinsic>(InVal) &&
1986 match(&I, m_ICmp(m_Specific(PN), m_APInt(Ignored))))
1987 return true;
1988
1989 // icmp eq zext(bool), 0 will fold to !bool.
1990 if (isa<ZExtInst>(InVal) &&
1991 cast<ZExtInst>(InVal)->getSrcTy()->isIntOrIntVectorTy(1) &&
1992 match(&I,
1994 return true;
1995
1996 return false;
1997 };
1998
1999 if (WillFold()) {
2000 OpsToMoveUseToIncomingBB.push_back(i);
2001 NewPhiValues.push_back(nullptr);
2002 continue;
2003 }
2004
2005 if (!OneUse && !IdenticalUsers)
2006 return nullptr;
2007
2008 if (SeenNonSimplifiedInVal)
2009 return nullptr; // More than one non-simplified value.
2010 SeenNonSimplifiedInVal = true;
2011
2012 // If there is exactly one non-simplified value, we can insert a copy of the
2013 // operation in that block. However, if this is a critical edge, we would
2014 // be inserting the computation on some other paths (e.g. inside a loop).
2015 // Only do this if the pred block is unconditionally branching into the phi
2016 // block. Also, make sure that the pred block is not dead code.
2018 if (!BI || !DT.isReachableFromEntry(InBB))
2019 return nullptr;
2020
2021 NewPhiValues.push_back(nullptr);
2022 OpsToMoveUseToIncomingBB.push_back(i);
2023
2024 // Do not push the operation across a loop backedge. This could result in
2025 // an infinite combine loop, and is generally non-profitable (especially
2026 // if the operation was originally outside the loop).
2027 if (isBackEdge(InBB, PN->getParent()))
2028 return nullptr;
2029 }
2030
2031 // Clone the instruction that uses the phi node and move it into the incoming
2032 // BB because we know that the next iteration of InstCombine will simplify it.
2034 for (auto OpIndex : OpsToMoveUseToIncomingBB) {
2036 BasicBlock *OpBB = PN->getIncomingBlock(OpIndex);
2037
2038 Instruction *Clone = Clones.lookup(OpBB);
2039 if (!Clone) {
2040 Clone = I.clone();
2041 for (Use &U : Clone->operands()) {
2042 if (U == PN)
2043 U = Op;
2044 else
2045 U = U->DoPHITranslation(PN->getParent(), OpBB);
2046 }
2047 Clone = InsertNewInstBefore(Clone, OpBB->getTerminator()->getIterator());
2048 Clones.insert({OpBB, Clone});
2049 // We may have speculated the instruction.
2051 }
2052
2053 NewPhiValues[OpIndex] = Clone;
2054 }
2055
2056 // Okay, we can do the transformation: create the new PHI node.
2057 PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
2058 InsertNewInstBefore(NewPN, PN->getIterator());
2059 NewPN->takeName(PN);
2060 NewPN->setDebugLoc(PN->getDebugLoc());
2061
2062 for (unsigned i = 0; i != NumPHIValues; ++i)
2063 NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
2064
2065 if (IdenticalUsers) {
2066 // Collect and deduplicate users up-front to avoid iterator invalidation.
2068 for (User *U : PN->users()) {
2070 if (User == &I)
2071 continue;
2072 ToReplace.insert(User);
2073 }
2074 for (Instruction *I : ToReplace) {
2075 replaceInstUsesWith(*I, NewPN);
2077 }
2078 OneUse = true;
2079 }
2080
2081 if (OneUse) {
2082 replaceAllDbgUsesWith(*PN, *NewPN, *PN, DT);
2083 }
2084 return replaceInstUsesWith(I, NewPN);
2085}
2086
2088 if (!BO.isAssociative())
2089 return nullptr;
2090
2091 // Find the interleaved binary ops.
2092 auto Opc = BO.getOpcode();
2093 auto *BO0 = dyn_cast<BinaryOperator>(BO.getOperand(0));
2094 auto *BO1 = dyn_cast<BinaryOperator>(BO.getOperand(1));
2095 if (!BO0 || !BO1 || !BO0->hasNUses(2) || !BO1->hasNUses(2) ||
2096 BO0->getOpcode() != Opc || BO1->getOpcode() != Opc ||
2097 !BO0->isAssociative() || !BO1->isAssociative() ||
2098 BO0->getParent() != BO1->getParent())
2099 return nullptr;
2100
2101 assert(BO.isCommutative() && BO0->isCommutative() && BO1->isCommutative() &&
2102 "Expected commutative instructions!");
2103
2104 // Find the matching phis, forming the recurrences.
2105 PHINode *PN0, *PN1;
2106 Value *Start0, *Step0, *Start1, *Step1;
2107 if (!matchSimpleRecurrence(BO0, PN0, Start0, Step0) || !PN0->hasOneUse() ||
2108 !matchSimpleRecurrence(BO1, PN1, Start1, Step1) || !PN1->hasOneUse() ||
2109 PN0->getParent() != PN1->getParent())
2110 return nullptr;
2111
2112 assert(PN0->getNumIncomingValues() == 2 && PN1->getNumIncomingValues() == 2 &&
2113 "Expected PHIs with two incoming values!");
2114
2115 // Convert the start and step values to constants.
2116 auto *Init0 = dyn_cast<Constant>(Start0);
2117 auto *Init1 = dyn_cast<Constant>(Start1);
2118 auto *C0 = dyn_cast<Constant>(Step0);
2119 auto *C1 = dyn_cast<Constant>(Step1);
2120 if (!Init0 || !Init1 || !C0 || !C1)
2121 return nullptr;
2122
2123 // Fold the recurrence constants.
2124 auto *Init = ConstantFoldBinaryInstruction(Opc, Init0, Init1);
2125 auto *C = ConstantFoldBinaryInstruction(Opc, C0, C1);
2126 if (!Init || !C)
2127 return nullptr;
2128
2129 // Create the reduced PHI.
2130 auto *NewPN = PHINode::Create(PN0->getType(), PN0->getNumIncomingValues(),
2131 "reduced.phi");
2132
2133 // Create the new binary op.
2134 auto *NewBO = BinaryOperator::Create(Opc, NewPN, C);
2135 if (Opc == Instruction::FAdd || Opc == Instruction::FMul) {
2136 // Intersect FMF flags for FADD and FMUL.
2137 FastMathFlags Intersect = BO0->getFastMathFlags() &
2138 BO1->getFastMathFlags() & BO.getFastMathFlags();
2139 NewBO->setFastMathFlags(Intersect);
2140 } else {
2141 OverflowTracking Flags;
2142 Flags.AllKnownNonNegative = false;
2143 Flags.AllKnownNonZero = false;
2144 Flags.mergeFlags(*BO0);
2145 Flags.mergeFlags(*BO1);
2146 Flags.mergeFlags(BO);
2147 Flags.applyFlags(*NewBO);
2148 }
2149 NewBO->takeName(&BO);
2150
2151 for (unsigned I = 0, E = PN0->getNumIncomingValues(); I != E; ++I) {
2152 auto *V = PN0->getIncomingValue(I);
2153 auto *BB = PN0->getIncomingBlock(I);
2154 if (V == Init0) {
2155 assert(((PN1->getIncomingValue(0) == Init1 &&
2156 PN1->getIncomingBlock(0) == BB) ||
2157 (PN1->getIncomingValue(1) == Init1 &&
2158 PN1->getIncomingBlock(1) == BB)) &&
2159 "Invalid incoming block!");
2160 NewPN->addIncoming(Init, BB);
2161 } else if (V == BO0) {
2162 assert(((PN1->getIncomingValue(0) == BO1 &&
2163 PN1->getIncomingBlock(0) == BB) ||
2164 (PN1->getIncomingValue(1) == BO1 &&
2165 PN1->getIncomingBlock(1) == BB)) &&
2166 "Invalid incoming block!");
2167 NewPN->addIncoming(NewBO, BB);
2168 } else
2169 llvm_unreachable("Unexpected incoming value!");
2170 }
2171
2172 LLVM_DEBUG(dbgs() << " Combined " << *PN0 << "\n " << *BO0
2173 << "\n with " << *PN1 << "\n " << *BO1
2174 << '\n');
2175
2176 // Insert the new recurrence and remove the old (dead) ones.
2177 InsertNewInstWith(NewPN, PN0->getIterator());
2178 InsertNewInstWith(NewBO, BO0->getIterator());
2179
2186
2187 return replaceInstUsesWith(BO, NewBO);
2188}
2189
2191 // Attempt to fold binary operators whose operands are simple recurrences.
2192 if (auto *NewBO = foldBinopWithRecurrence(BO))
2193 return NewBO;
2194
2195 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
2196 // we are guarding against replicating the binop in >1 predecessor.
2197 // This could miss matching a phi with 2 constant incoming values.
2198 auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
2199 auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
2200 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
2201 Phi0->getNumOperands() != Phi1->getNumOperands())
2202 return nullptr;
2203
2204 // TODO: Remove the restriction for binop being in the same block as the phis.
2205 if (BO.getParent() != Phi0->getParent() ||
2206 BO.getParent() != Phi1->getParent())
2207 return nullptr;
2208
2209 // Fold if there is at least one specific constant value in phi0 or phi1's
2210 // incoming values that comes from the same block and this specific constant
2211 // value can be used to do optimization for specific binary operator.
2212 // For example:
2213 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
2214 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
2215 // %add = add i32 %phi0, %phi1
2216 // ==>
2217 // %add = phi i32 [%j, %bb0], [%i, %bb1]
2219 /*AllowRHSConstant*/ false);
2220 if (C) {
2221 SmallVector<Value *, 4> NewIncomingValues;
2222 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
2223 auto &Phi0Use = std::get<0>(T);
2224 auto &Phi1Use = std::get<1>(T);
2225 if (Phi0->getIncomingBlock(Phi0Use) != Phi1->getIncomingBlock(Phi1Use))
2226 return false;
2227 Value *Phi0UseV = Phi0Use.get();
2228 Value *Phi1UseV = Phi1Use.get();
2229 if (Phi0UseV == C)
2230 NewIncomingValues.push_back(Phi1UseV);
2231 else if (Phi1UseV == C)
2232 NewIncomingValues.push_back(Phi0UseV);
2233 else
2234 return false;
2235 return true;
2236 };
2237
2238 if (all_of(zip(Phi0->operands(), Phi1->operands()),
2239 CanFoldIncomingValuePair)) {
2240 PHINode *NewPhi =
2241 PHINode::Create(Phi0->getType(), Phi0->getNumOperands());
2242 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
2243 "The number of collected incoming values should equal the number "
2244 "of the original PHINode operands!");
2245 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
2246 NewPhi->addIncoming(NewIncomingValues[I], Phi0->getIncomingBlock(I));
2247 return NewPhi;
2248 }
2249 }
2250
2251 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
2252 return nullptr;
2253
2254 // Match a pair of incoming constants for one of the predecessor blocks.
2255 BasicBlock *ConstBB, *OtherBB;
2256 Constant *C0, *C1;
2257 if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
2258 ConstBB = Phi0->getIncomingBlock(0);
2259 OtherBB = Phi0->getIncomingBlock(1);
2260 } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
2261 ConstBB = Phi0->getIncomingBlock(1);
2262 OtherBB = Phi0->getIncomingBlock(0);
2263 } else {
2264 return nullptr;
2265 }
2266 if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
2267 return nullptr;
2268
2269 // The block that we are hoisting to must reach here unconditionally.
2270 // Otherwise, we could be speculatively executing an expensive or
2271 // non-speculative op.
2272 auto *PredBlockBranch = dyn_cast<UncondBrInst>(OtherBB->getTerminator());
2273 if (!PredBlockBranch || !DT.isReachableFromEntry(OtherBB))
2274 return nullptr;
2275
2276 // TODO: This check could be tightened to only apply to binops (div/rem) that
2277 // are not safe to speculatively execute. But that could allow hoisting
2278 // potentially expensive instructions (fdiv for example).
2279 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
2281 return nullptr;
2282
2283 // Fold constants for the predecessor block with constant incoming values.
2284 Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
2285 if (!NewC)
2286 return nullptr;
2287
2288 // Make a new binop in the predecessor block with the non-constant incoming
2289 // values.
2290 Builder.SetInsertPoint(PredBlockBranch);
2291 Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
2292 Phi0->getIncomingValueForBlock(OtherBB),
2293 Phi1->getIncomingValueForBlock(OtherBB));
2294 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
2295 NotFoldedNewBO->copyIRFlags(&BO);
2296
2297 // Replace the binop with a phi of the new values. The old phis are dead.
2298 PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
2299 NewPhi->addIncoming(NewBO, OtherBB);
2300 NewPhi->addIncoming(NewC, ConstBB);
2301 return NewPhi;
2302}
2303
2305 auto TryFoldOperand = [&](unsigned OpIdx,
2306 bool IsOtherParamConst) -> Instruction * {
2307 if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(OpIdx)))
2308 return FoldOpIntoSelect(I, Sel, false, !IsOtherParamConst);
2309 if (auto *PN = dyn_cast<PHINode>(I.getOperand(OpIdx)))
2310 return foldOpIntoPhi(I, PN);
2311 return nullptr;
2312 };
2313
2314 if (Instruction *NewI =
2315 TryFoldOperand(/*OpIdx=*/0, isa<Constant>(I.getOperand(1))))
2316 return NewI;
2317 return TryFoldOperand(/*OpIdx=*/1, isa<Constant>(I.getOperand(0)));
2318}
2319
2321 // If this GEP has only 0 indices, it is the same pointer as
2322 // Src. If Src is not a trivial GEP too, don't combine
2323 // the indices.
2324 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
2325 !Src.hasOneUse())
2326 return false;
2327 return true;
2328}
2329
2330/// Find a constant NewC that has property:
2331/// shuffle(NewC, ShMask) = C
2332/// Returns nullptr if such a constant does not exist e.g. ShMask=<0,0> C=<1,2>
2333///
2334/// A 1-to-1 mapping is not required. Example:
2335/// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison>
2337 VectorType *NewCTy) {
2338 if (isa<ScalableVectorType>(NewCTy)) {
2339 Constant *Splat = C->getSplatValue();
2340 if (!Splat)
2341 return nullptr;
2343 }
2344
2345 if (cast<FixedVectorType>(NewCTy)->getNumElements() >
2346 cast<FixedVectorType>(C->getType())->getNumElements())
2347 return nullptr;
2348
2349 unsigned NewCNumElts = cast<FixedVectorType>(NewCTy)->getNumElements();
2350 PoisonValue *PoisonScalar = PoisonValue::get(C->getType()->getScalarType());
2351 SmallVector<Constant *, 16> NewVecC(NewCNumElts, PoisonScalar);
2352 unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
2353 for (unsigned I = 0; I < NumElts; ++I) {
2354 Constant *CElt = C->getAggregateElement(I);
2355 if (ShMask[I] >= 0) {
2356 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2357 Constant *NewCElt = NewVecC[ShMask[I]];
2358 // Bail out if:
2359 // 1. The constant vector contains a constant expression.
2360 // 2. The shuffle needs an element of the constant vector that can't
2361 // be mapped to a new constant vector.
2362 // 3. This is a widening shuffle that copies elements of V1 into the
2363 // extended elements (extending with poison is allowed).
2364 if (!CElt || (!isa<PoisonValue>(NewCElt) && NewCElt != CElt) ||
2365 I >= NewCNumElts)
2366 return nullptr;
2367 NewVecC[ShMask[I]] = CElt;
2368 }
2369 }
2370 return ConstantVector::get(NewVecC);
2371}
2372
2373// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS).
2375 Constant *Splat, bool SplatLHS,
2376 const DataLayout &DL) {
2377 ElementCount EC = cast<VectorType>(Vector->getType())->getElementCount();
2379 Constant *RHS = Vector;
2380 if (!SplatLHS)
2381 std::swap(LHS, RHS);
2382 return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
2383}
2384
2385template <Intrinsic::ID SpliceID>
2387 InstCombiner::BuilderTy &Builder) {
2388 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
2389 auto CreateBinOpSplice = [&](Value *X, Value *Y, Value *Offset) {
2390 Value *V = Builder.CreateBinOp(Inst.getOpcode(), X, Y, Inst.getName());
2391 if (auto *BO = dyn_cast<BinaryOperator>(V))
2392 BO->copyIRFlags(&Inst);
2393 Module *M = Inst.getModule();
2394 Function *F = Intrinsic::getOrInsertDeclaration(M, SpliceID, V->getType());
2395 return CallInst::Create(F, {V, PoisonValue::get(V->getType()), Offset});
2396 };
2397 Value *V1, *V2, *Offset;
2398 if (match(LHS,
2400 // Op(splice(V1, poison, offset), splice(V2, poison, offset))
2401 // -> splice(Op(V1, V2), poison, offset)
2403 m_Specific(Offset))) &&
2404 (LHS->hasOneUse() || RHS->hasOneUse() ||
2405 (LHS == RHS && LHS->hasNUses(2))))
2406 return CreateBinOpSplice(V1, V2, Offset);
2407
2408 // Op(splice(V1, poison, offset), RHSSplat)
2409 // -> splice(Op(V1, RHSSplat), poison, offset)
2410 if (LHS->hasOneUse() && isSplatValue(RHS))
2411 return CreateBinOpSplice(V1, RHS, Offset);
2412 }
2413 // Op(LHSSplat, splice(V2, poison, offset))
2414 // -> splice(Op(LHSSplat, V2), poison, offset)
2415 else if (isSplatValue(LHS) &&
2417 m_Value(Offset)))))
2418 return CreateBinOpSplice(LHS, V2, Offset);
2419
2420 // TODO: Fold binops of the form
2421 // Op(splice(poison, V1, offset), splice(poison, V2, offset))
2422 // -> splice(poison, Op(V1, V2), offset)
2423
2424 return nullptr;
2425}
2426
2428 if (!isa<VectorType>(Inst.getType()))
2429 return nullptr;
2430
2431 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
2432 Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
2433 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
2434 cast<VectorType>(Inst.getType())->getElementCount());
2435 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
2436 cast<VectorType>(Inst.getType())->getElementCount());
2437
2438 auto foldConstantsThroughSubVectorInsertSplat =
2439 [&](Value *MaybeSubVector, Value *MaybeSplat,
2440 bool SplatLHS) -> Instruction * {
2441 Value *Idx;
2442 Constant *Splat, *SubVector, *Dest;
2443 if (!match(MaybeSplat, m_ConstantSplat(m_Constant(Splat))) ||
2444 !match(MaybeSubVector,
2445 m_VectorInsert(m_Constant(Dest), m_Constant(SubVector),
2446 m_Value(Idx))))
2447 return nullptr;
2448 SubVector =
2449 constantFoldBinOpWithSplat(Opcode, SubVector, Splat, SplatLHS, DL);
2450 Dest = constantFoldBinOpWithSplat(Opcode, Dest, Splat, SplatLHS, DL);
2451 if (!SubVector || !Dest)
2452 return nullptr;
2453 auto *InsertVector =
2454 Builder.CreateInsertVector(Dest->getType(), Dest, SubVector, Idx);
2455 return replaceInstUsesWith(Inst, InsertVector);
2456 };
2457
2458 // If one operand is a constant splat and the other operand is a
2459 // `vector.insert` where both the destination and subvector are constant,
2460 // apply the operation to both the destination and subvector, returning a new
2461 // constant `vector.insert`. This helps constant folding for scalable vectors.
2462 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2463 /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false))
2464 return Folded;
2465 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2466 /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true))
2467 return Folded;
2468
2469 // If both operands of the binop are vector concatenations, then perform the
2470 // narrow binop on each pair of the source operands followed by concatenation
2471 // of the results.
2472 Value *L0, *L1, *R0, *R1;
2473 ArrayRef<int> Mask;
2474 if (match(LHS, m_Shuffle(m_Value(L0), m_Value(L1), m_Mask(Mask))) &&
2475 match(RHS, m_Shuffle(m_Value(R0), m_Value(R1), m_SpecificMask(Mask))) &&
2476 LHS->hasOneUse() && RHS->hasOneUse() &&
2477 cast<ShuffleVectorInst>(LHS)->isConcat() &&
2478 cast<ShuffleVectorInst>(RHS)->isConcat()) {
2479 // This transform does not have the speculative execution constraint as
2480 // below because the shuffle is a concatenation. The new binops are
2481 // operating on exactly the same elements as the existing binop.
2482 // TODO: We could ease the mask requirement to allow different undef lanes,
2483 // but that requires an analysis of the binop-with-undef output value.
2484 Value *NewBO0 = Builder.CreateBinOp(Opcode, L0, R0);
2485 if (auto *BO = dyn_cast<BinaryOperator>(NewBO0))
2486 BO->copyIRFlags(&Inst);
2487 Value *NewBO1 = Builder.CreateBinOp(Opcode, L1, R1);
2488 if (auto *BO = dyn_cast<BinaryOperator>(NewBO1))
2489 BO->copyIRFlags(&Inst);
2490 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
2491 }
2492
2493 auto createBinOpReverse = [&](Value *X, Value *Y) {
2494 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
2495 if (auto *BO = dyn_cast<BinaryOperator>(V))
2496 BO->copyIRFlags(&Inst);
2497 Module *M = Inst.getModule();
2499 M, Intrinsic::vector_reverse, V->getType());
2500 return CallInst::Create(F, V);
2501 };
2502
2503 // NOTE: Reverse shuffles don't require the speculative execution protection
2504 // below because they don't affect which lanes take part in the computation.
2505
2506 Value *V1, *V2;
2507 if (match(LHS, m_VecReverse(m_Value(V1)))) {
2508 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2509 if (match(RHS, m_VecReverse(m_Value(V2))) &&
2510 (LHS->hasOneUse() || RHS->hasOneUse() ||
2511 (LHS == RHS && LHS->hasNUses(2))))
2512 return createBinOpReverse(V1, V2);
2513
2514 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2515 if (LHS->hasOneUse() && isSplatValue(RHS))
2516 return createBinOpReverse(V1, RHS);
2517 }
2518 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2519 else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
2520 return createBinOpReverse(LHS, V2);
2521
2522 auto createBinOpVPReverse = [&](Value *X, Value *Y, Value *EVL) {
2523 Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
2524 if (auto *BO = dyn_cast<BinaryOperator>(V))
2525 BO->copyIRFlags(&Inst);
2526
2527 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
2528 Value *AllTrueMask = Builder.CreateVectorSplat(EC, Builder.getTrue());
2529 Module *M = Inst.getModule();
2531 M, Intrinsic::experimental_vp_reverse, V->getType());
2532 return CallInst::Create(F, {V, AllTrueMask, EVL});
2533 };
2534
2535 Value *EVL;
2537 m_Value(V1), m_AllOnes(), m_Value(EVL)))) {
2538 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2540 m_Value(V2), m_AllOnes(), m_Specific(EVL))) &&
2541 (LHS->hasOneUse() || RHS->hasOneUse() ||
2542 (LHS == RHS && LHS->hasNUses(2))))
2543 return createBinOpVPReverse(V1, V2, EVL);
2544
2545 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2546 if (LHS->hasOneUse() && isSplatValue(RHS))
2547 return createBinOpVPReverse(V1, RHS, EVL);
2548 }
2549 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2550 else if (isSplatValue(LHS) &&
2552 m_Value(V2), m_AllOnes(), m_Value(EVL))))
2553 return createBinOpVPReverse(LHS, V2, EVL);
2554
2555 if (Instruction *Folded =
2557 return Folded;
2558 if (Instruction *Folded =
2560 return Folded;
2561
2562 // It may not be safe to reorder shuffles and things like div, urem, etc.
2563 // because we may trap when executing those ops on unknown vector elements.
2564 // See PR20059.
2566 return nullptr;
2567
2568 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
2569 Value *XY = Builder.CreateBinOp(Opcode, X, Y);
2570 if (auto *BO = dyn_cast<BinaryOperator>(XY))
2571 BO->copyIRFlags(&Inst);
2572 return new ShuffleVectorInst(XY, M);
2573 };
2574
2575 // If both arguments of the binary operation are shuffles that use the same
2576 // mask and shuffle within a single vector, move the shuffle after the binop.
2577 if (match(LHS, m_Shuffle(m_Value(V1), m_Poison(), m_Mask(Mask))) &&
2578 match(RHS, m_Shuffle(m_Value(V2), m_Poison(), m_SpecificMask(Mask))) &&
2579 V1->getType() == V2->getType() &&
2580 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
2581 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2582 return createBinOpShuffle(V1, V2, Mask);
2583 }
2584
2585 // If both arguments of a commutative binop are select-shuffles that use the
2586 // same mask with commuted operands, the shuffles are unnecessary.
2587 if (Inst.isCommutative() &&
2588 match(LHS, m_Shuffle(m_Value(V1), m_Value(V2), m_Mask(Mask))) &&
2589 match(RHS,
2590 m_Shuffle(m_Specific(V2), m_Specific(V1), m_SpecificMask(Mask)))) {
2591 auto *LShuf = cast<ShuffleVectorInst>(LHS);
2592 auto *RShuf = cast<ShuffleVectorInst>(RHS);
2593 // TODO: Allow shuffles that contain undefs in the mask?
2594 // That is legal, but it reduces undef knowledge.
2595 // TODO: Allow arbitrary shuffles by shuffling after binop?
2596 // That might be legal, but we have to deal with poison.
2597 if (LShuf->isSelect() &&
2598 !is_contained(LShuf->getShuffleMask(), PoisonMaskElem) &&
2599 RShuf->isSelect() &&
2600 !is_contained(RShuf->getShuffleMask(), PoisonMaskElem)) {
2601 // Example:
2602 // LHS = shuffle V1, V2, <0, 5, 6, 3>
2603 // RHS = shuffle V2, V1, <0, 5, 6, 3>
2604 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2605 Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
2606 NewBO->copyIRFlags(&Inst);
2607 return NewBO;
2608 }
2609 }
2610
2611 // If one argument is a shuffle within one vector and the other is a constant,
2612 // try moving the shuffle after the binary operation. This canonicalization
2613 // intends to move shuffles closer to other shuffles and binops closer to
2614 // other binops, so they can be folded. It may also enable demanded elements
2615 // transforms.
2616 Constant *C;
2618 m_Mask(Mask))),
2619 m_ImmConstant(C)))) {
2620 assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
2621 "Shuffle should not change scalar type");
2622
2623 bool ConstOp1 = isa<Constant>(RHS);
2624 if (Constant *NewC =
2626 // For fixed vectors, lanes of NewC not used by the shuffle will be poison
2627 // which will cause UB for div/rem. Mask them with a safe constant.
2628 if (isa<FixedVectorType>(V1->getType()) && Inst.isIntDivRem())
2629 NewC = getSafeVectorConstantForBinop(Opcode, NewC, ConstOp1);
2630
2631 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2632 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2633 Value *NewLHS = ConstOp1 ? V1 : NewC;
2634 Value *NewRHS = ConstOp1 ? NewC : V1;
2635 return createBinOpShuffle(NewLHS, NewRHS, Mask);
2636 }
2637 }
2638
2639 // Try to reassociate to sink a splat shuffle after a binary operation.
2640 if (Inst.isAssociative() && Inst.isCommutative()) {
2641 // Canonicalize shuffle operand as LHS.
2642 if (isa<ShuffleVectorInst>(RHS))
2643 std::swap(LHS, RHS);
2644
2645 Value *X;
2646 ArrayRef<int> MaskC;
2647 int SplatIndex;
2648 Value *Y, *OtherOp;
2649 if (!match(LHS,
2650 m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
2651 !match(MaskC, m_SplatOrPoisonMask(SplatIndex)) ||
2652 X->getType() != Inst.getType() ||
2653 !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
2654 return nullptr;
2655
2656 // FIXME: This may not be safe if the analysis allows undef elements. By
2657 // moving 'Y' before the splat shuffle, we are implicitly assuming
2658 // that it is not undef/poison at the splat index.
2659 if (isSplatValue(OtherOp, SplatIndex)) {
2660 std::swap(Y, OtherOp);
2661 } else if (!isSplatValue(Y, SplatIndex)) {
2662 return nullptr;
2663 }
2664
2665 // X and Y are splatted values, so perform the binary operation on those
2666 // values followed by a splat followed by the 2nd binary operation:
2667 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2668 Value *NewBO = Builder.CreateBinOp(Opcode, X, Y);
2669 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
2670 Value *NewSplat = Builder.CreateShuffleVector(NewBO, NewMask);
2671 Instruction *R = BinaryOperator::Create(Opcode, NewSplat, OtherOp);
2672
2673 // Intersect FMF on both new binops. Other (poison-generating) flags are
2674 // dropped to be safe.
2675 if (isa<FPMathOperator>(R)) {
2676 R->copyFastMathFlags(&Inst);
2677 R->andIRFlags(RHS);
2678 }
2679 if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
2680 NewInstBO->copyIRFlags(R);
2681 return R;
2682 }
2683
2684 return nullptr;
2685}
2686
2687/// Try to narrow the width of a binop if at least 1 operand is an extend of
2688/// of a value. This requires a potentially expensive known bits check to make
2689/// sure the narrow op does not overflow.
2690Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2691 // We need at least one extended operand.
2692 Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
2693
2694 // If this is a sub, we swap the operands since we always want an extension
2695 // on the RHS. The LHS can be an extension or a constant.
2696 if (BO.getOpcode() == Instruction::Sub)
2697 std::swap(Op0, Op1);
2698
2699 Value *X;
2700 bool IsSext = match(Op0, m_SExt(m_Value(X)));
2701 if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
2702 return nullptr;
2703
2704 // If both operands are the same extension from the same source type and we
2705 // can eliminate at least one (hasOneUse), this might work.
2706 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2707 Value *Y;
2708 if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
2709 cast<Operator>(Op1)->getOpcode() == CastOpc &&
2710 (Op0->hasOneUse() || Op1->hasOneUse()))) {
2711 // If that did not match, see if we have a suitable constant operand.
2712 // Truncating and extending must produce the same constant.
2713 Constant *WideC;
2714 if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
2715 return nullptr;
2716 Constant *NarrowC = getLosslessInvCast(WideC, X->getType(), CastOpc, DL);
2717 if (!NarrowC)
2718 return nullptr;
2719 Y = NarrowC;
2720 }
2721
2722 // Swap back now that we found our operands.
2723 if (BO.getOpcode() == Instruction::Sub)
2724 std::swap(X, Y);
2725
2726 // Both operands have narrow versions. Last step: the math must not overflow
2727 // in the narrow width.
2728 if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
2729 return nullptr;
2730
2731 // bo (ext X), (ext Y) --> ext (bo X, Y)
2732 // bo (ext X), C --> ext (bo X, C')
2733 Value *NarrowBO = Builder.CreateBinOp(BO.getOpcode(), X, Y, "narrow");
2734 if (auto *NewBinOp = dyn_cast<BinaryOperator>(NarrowBO)) {
2735 if (IsSext)
2736 NewBinOp->setHasNoSignedWrap();
2737 else
2738 NewBinOp->setHasNoUnsignedWrap();
2739 }
2740 return CastInst::Create(CastOpc, NarrowBO, BO.getType());
2741}
2742
2743/// Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y))
2744/// transform.
2749
2750/// Thread a GEP operation with constant indices through the constant true/false
2751/// arms of a select.
2753 InstCombiner::BuilderTy &Builder) {
2754 if (!GEP.hasAllConstantIndices())
2755 return nullptr;
2756
2757 Instruction *Sel;
2758 Value *Cond;
2759 Constant *TrueC, *FalseC;
2760 if (!match(GEP.getPointerOperand(), m_Instruction(Sel)) ||
2761 !match(Sel,
2762 m_Select(m_Value(Cond), m_Constant(TrueC), m_Constant(FalseC))))
2763 return nullptr;
2764
2765 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2766 // Propagate 'inbounds' and metadata from existing instructions.
2767 // Note: using IRBuilder to create the constants for efficiency.
2768 SmallVector<Value *, 4> IndexC(GEP.indices());
2769 GEPNoWrapFlags NW = GEP.getNoWrapFlags();
2770 Type *Ty = GEP.getSourceElementType();
2771 Value *NewTrueC = Builder.CreateGEP(Ty, TrueC, IndexC, "", NW);
2772 Value *NewFalseC = Builder.CreateGEP(Ty, FalseC, IndexC, "", NW);
2773 return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
2774}
2775
2776// Canonicalization:
2777// gep T, (gep i8, base, C1), (Index + C2) into
2778// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index
2780 GEPOperator *Src,
2781 InstCombinerImpl &IC) {
2782 if (GEP.getNumIndices() != 1)
2783 return nullptr;
2784 auto &DL = IC.getDataLayout();
2785 Value *Base;
2786 const APInt *C1;
2787 if (!match(Src, m_PtrAdd(m_Value(Base), m_APInt(C1))))
2788 return nullptr;
2789 Value *VarIndex;
2790 const APInt *C2;
2791 Type *PtrTy = Src->getType()->getScalarType();
2792 unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(PtrTy);
2793 if (!match(GEP.getOperand(1), m_AddLike(m_Value(VarIndex), m_APInt(C2))))
2794 return nullptr;
2795 if (C1->getBitWidth() != IndexSizeInBits ||
2796 C2->getBitWidth() != IndexSizeInBits)
2797 return nullptr;
2798 Type *BaseType = GEP.getSourceElementType();
2800 return nullptr;
2801 APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(BaseType));
2802 APInt NewOffset = TypeSize * *C2 + *C1;
2803 if (NewOffset.isZero() ||
2804 (Src->hasOneUse() && GEP.getOperand(1)->hasOneUse())) {
2806 if (GEP.hasNoUnsignedWrap() &&
2807 cast<GEPOperator>(Src)->hasNoUnsignedWrap() &&
2808 match(GEP.getOperand(1), m_NUWAddLike(m_Value(), m_Value()))) {
2810 if (GEP.isInBounds() && cast<GEPOperator>(Src)->isInBounds())
2811 Flags |= GEPNoWrapFlags::inBounds();
2812 }
2813
2814 Value *GEPConst =
2815 IC.Builder.CreatePtrAdd(Base, IC.Builder.getInt(NewOffset), "", Flags);
2816 return GetElementPtrInst::Create(BaseType, GEPConst, VarIndex, Flags);
2817 }
2818
2819 return nullptr;
2820}
2821
2822/// Combine constant offsets separated by variable offsets.
2823/// ptradd (ptradd (ptradd p, C1), x), C2 -> ptradd (ptradd p, x), C1+C2
2825 InstCombinerImpl &IC) {
2826 if (!GEP.hasAllConstantIndices())
2827 return nullptr;
2828
2831 auto *InnerGEP = dyn_cast<GetElementPtrInst>(GEP.getPointerOperand());
2832 while (true) {
2833 if (!InnerGEP)
2834 return nullptr;
2835
2836 NW = NW.intersectForReassociate(InnerGEP->getNoWrapFlags());
2837 if (InnerGEP->hasAllConstantIndices())
2838 break;
2839
2840 if (!InnerGEP->hasOneUse())
2841 return nullptr;
2842
2843 Skipped.push_back(InnerGEP);
2844 InnerGEP = dyn_cast<GetElementPtrInst>(InnerGEP->getPointerOperand());
2845 }
2846
2847 // The two constant offset GEPs are directly adjacent: Let normal offset
2848 // merging handle it.
2849 if (Skipped.empty())
2850 return nullptr;
2851
2852 // FIXME: This one-use check is not strictly necessary. Consider relaxing it
2853 // if profitable.
2854 if (!InnerGEP->hasOneUse())
2855 return nullptr;
2856
2857 // Don't bother with vector splats.
2858 Type *Ty = GEP.getType();
2859 if (InnerGEP->getType() != Ty)
2860 return nullptr;
2861
2862 const DataLayout &DL = IC.getDataLayout();
2863 APInt Offset(DL.getIndexTypeSizeInBits(Ty), 0);
2864 if (!GEP.accumulateConstantOffset(DL, Offset) ||
2865 !InnerGEP->accumulateConstantOffset(DL, Offset))
2866 return nullptr;
2867
2868 IC.replaceOperand(*Skipped.back(), 0, InnerGEP->getPointerOperand());
2869 for (GetElementPtrInst *SkippedGEP : Skipped)
2870 SkippedGEP->setNoWrapFlags(NW);
2871
2872 return IC.replaceInstUsesWith(
2873 GEP,
2874 IC.Builder.CreatePtrAdd(Skipped.front(), IC.Builder.getInt(Offset), "",
2875 NW.intersectForOffsetAdd(GEP.getNoWrapFlags())));
2876}
2877
2879 GEPOperator *Src) {
2880 // Combine Indices - If the source pointer to this getelementptr instruction
2881 // is a getelementptr instruction with matching element type, combine the
2882 // indices of the two getelementptr instructions into a single instruction.
2883 if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
2884 return nullptr;
2885
2886 if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, *this))
2887 return I;
2888
2889 if (auto *I = combineConstantOffsets(GEP, *this))
2890 return I;
2891
2892 if (Src->getResultElementType() != GEP.getSourceElementType())
2893 return nullptr;
2894
2895 // Fold chained GEP with constant base into single GEP:
2896 // gep i8, (gep i8, %base, C1), (select Cond, C2, C3)
2897 // -> gep i8, %base, (select Cond, C1+C2, C1+C3)
2898 if (Src->hasOneUse() && GEP.getNumIndices() == 1 &&
2899 Src->getNumIndices() == 1) {
2900 Value *SrcIdx = *Src->idx_begin();
2901 Value *GEPIdx = *GEP.idx_begin();
2902 const APInt *ConstOffset, *TrueVal, *FalseVal;
2903 Value *Cond;
2904
2905 if ((match(SrcIdx, m_APInt(ConstOffset)) &&
2906 match(GEPIdx,
2907 m_Select(m_Value(Cond), m_APInt(TrueVal), m_APInt(FalseVal)))) ||
2908 (match(GEPIdx, m_APInt(ConstOffset)) &&
2909 match(SrcIdx,
2910 m_Select(m_Value(Cond), m_APInt(TrueVal), m_APInt(FalseVal))))) {
2911 auto *Select = isa<SelectInst>(GEPIdx) ? cast<SelectInst>(GEPIdx)
2912 : cast<SelectInst>(SrcIdx);
2913
2914 // Make sure the select has only one use.
2915 if (!Select->hasOneUse())
2916 return nullptr;
2917
2918 if (TrueVal->getBitWidth() != ConstOffset->getBitWidth() ||
2919 FalseVal->getBitWidth() != ConstOffset->getBitWidth())
2920 return nullptr;
2921
2922 APInt NewTrueVal = *ConstOffset + *TrueVal;
2923 APInt NewFalseVal = *ConstOffset + *FalseVal;
2924 Constant *NewTrue = ConstantInt::get(Select->getType(), NewTrueVal);
2925 Constant *NewFalse = ConstantInt::get(Select->getType(), NewFalseVal);
2926 Value *NewSelect = Builder.CreateSelect(
2927 Cond, NewTrue, NewFalse, /*Name=*/"",
2928 /*MDFrom=*/(ProfcheckDisableMetadataFixes ? nullptr : Select));
2929 GEPNoWrapFlags Flags =
2931 return replaceInstUsesWith(GEP,
2932 Builder.CreateGEP(GEP.getResultElementType(),
2933 Src->getPointerOperand(),
2934 NewSelect, "", Flags));
2935 }
2936 }
2937
2938 // Find out whether the last index in the source GEP is a sequential idx.
2939 bool EndsWithSequential = false;
2940 for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
2941 I != E; ++I)
2942 EndsWithSequential = I.isSequential();
2943 if (!EndsWithSequential)
2944 return nullptr;
2945
2946 // Replace: gep (gep %P, long B), long A, ...
2947 // With: T = long A+B; gep %P, T, ...
2948 Value *SO1 = Src->getOperand(Src->getNumOperands() - 1);
2949 Value *GO1 = GEP.getOperand(1);
2950
2951 // If they aren't the same type, then the input hasn't been processed
2952 // by the loop above yet (which canonicalizes sequential index types to
2953 // intptr_t). Just avoid transforming this until the input has been
2954 // normalized.
2955 if (SO1->getType() != GO1->getType())
2956 return nullptr;
2957
2958 Value *Sum =
2959 simplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP));
2960 // Only do the combine when we are sure the cost after the
2961 // merge is never more than that before the merge.
2962 if (Sum == nullptr)
2963 return nullptr;
2964
2966 Indices.append(Src->op_begin() + 1, Src->op_end() - 1);
2967 Indices.push_back(Sum);
2968 Indices.append(GEP.op_begin() + 2, GEP.op_end());
2969
2970 // Don't create GEPs with more than one non-zero index.
2971 unsigned NumNonZeroIndices = count_if(Indices, [](Value *Idx) {
2972 auto *C = dyn_cast<Constant>(Idx);
2973 return !C || !C->isNullValue();
2974 });
2975 if (NumNonZeroIndices > 1)
2976 return nullptr;
2977
2978 return replaceInstUsesWith(
2979 GEP, Builder.CreateGEP(
2980 Src->getSourceElementType(), Src->getOperand(0), Indices, "",
2982}
2983
2986 bool &DoesConsume, unsigned Depth) {
2987 static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
2988 // ~(~(X)) -> X.
2989 Value *A, *B;
2990 if (match(V, m_Not(m_Value(A)))) {
2991 DoesConsume = true;
2992 return A;
2993 }
2994
2995 Constant *C;
2996 // Constants can be considered to be not'ed values.
2997 if (match(V, m_ImmConstant(C)))
2998 return ConstantExpr::getNot(C);
2999
3001 return nullptr;
3002
3003 // The rest of the cases require that we invert all uses so don't bother
3004 // doing the analysis if we know we can't use the result.
3005 if (!WillInvertAllUses)
3006 return nullptr;
3007
3008 // Compares can be inverted if all of their uses are being modified to use
3009 // the ~V.
3010 if (auto *I = dyn_cast<CmpInst>(V)) {
3011 if (Builder != nullptr)
3012 return Builder->CreateCmp(I->getInversePredicate(), I->getOperand(0),
3013 I->getOperand(1));
3014 return NonNull;
3015 }
3016
3017 // If `V` is of the form `A + B` then `-1 - V` can be folded into
3018 // `(-1 - B) - A` if we are willing to invert all of the uses.
3019 if (match(V, m_Add(m_Value(A), m_Value(B)))) {
3020 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
3021 DoesConsume, Depth))
3022 return Builder ? Builder->CreateSub(BV, A) : NonNull;
3023 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3024 DoesConsume, Depth))
3025 return Builder ? Builder->CreateSub(AV, B) : NonNull;
3026 return nullptr;
3027 }
3028
3029 // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
3030 // into `A ^ B` if we are willing to invert all of the uses.
3031 if (match(V, m_Xor(m_Value(A), m_Value(B)))) {
3032 if (auto *BV = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
3033 DoesConsume, Depth))
3034 return Builder ? Builder->CreateXor(A, BV) : NonNull;
3035 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3036 DoesConsume, Depth))
3037 return Builder ? Builder->CreateXor(AV, B) : NonNull;
3038 return nullptr;
3039 }
3040
3041 // If `V` is of the form `B - A` then `-1 - V` can be folded into
3042 // `A + (-1 - B)` if we are willing to invert all of the uses.
3043 if (match(V, m_Sub(m_Value(A), m_Value(B)))) {
3044 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3045 DoesConsume, Depth))
3046 return Builder ? Builder->CreateAdd(AV, B) : NonNull;
3047 return nullptr;
3048 }
3049
3050 // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
3051 // into `A s>> B` if we are willing to invert all of the uses.
3052 if (match(V, m_AShr(m_Value(A), m_Value(B)))) {
3053 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3054 DoesConsume, Depth))
3055 return Builder ? Builder->CreateAShr(AV, B) : NonNull;
3056 return nullptr;
3057 }
3058
3059 Value *Cond;
3060 // LogicOps are special in that we canonicalize them at the cost of an
3061 // instruction.
3062 bool IsSelect = match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) &&
3064 // Selects/min/max with invertible operands are freely invertible
3065 if (IsSelect || match(V, m_MaxOrMin(m_Value(A), m_Value(B)))) {
3066 bool LocalDoesConsume = DoesConsume;
3067 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder*/ nullptr,
3068 LocalDoesConsume, Depth))
3069 return nullptr;
3070 if (Value *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3071 LocalDoesConsume, Depth)) {
3072 DoesConsume = LocalDoesConsume;
3073 if (Builder != nullptr) {
3074 Value *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
3075 DoesConsume, Depth);
3076 assert(NotB != nullptr &&
3077 "Unable to build inverted value for known freely invertable op");
3078 if (auto *II = dyn_cast<IntrinsicInst>(V))
3079 return Builder->CreateBinaryIntrinsic(
3080 getInverseMinMaxIntrinsic(II->getIntrinsicID()), NotA, NotB);
3081 return Builder->CreateSelect(
3082 Cond, NotA, NotB, "",
3084 }
3085 return NonNull;
3086 }
3087 }
3088
3089 if (PHINode *PN = dyn_cast<PHINode>(V)) {
3090 bool LocalDoesConsume = DoesConsume;
3092 for (Use &U : PN->operands()) {
3093 BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
3094 Value *NewIncomingVal = getFreelyInvertedImpl(
3095 U.get(), /*WillInvertAllUses=*/false,
3096 /*Builder=*/nullptr, LocalDoesConsume, MaxAnalysisRecursionDepth - 1);
3097 if (NewIncomingVal == nullptr)
3098 return nullptr;
3099 // Make sure that we can safely erase the original PHI node.
3100 if (NewIncomingVal == V)
3101 return nullptr;
3102 if (Builder != nullptr)
3103 IncomingValues.emplace_back(NewIncomingVal, IncomingBlock);
3104 }
3105
3106 DoesConsume = LocalDoesConsume;
3107 if (Builder != nullptr) {
3109 Builder->SetInsertPoint(PN);
3110 PHINode *NewPN =
3111 Builder->CreatePHI(PN->getType(), PN->getNumIncomingValues());
3112 for (auto [Val, Pred] : IncomingValues)
3113 NewPN->addIncoming(Val, Pred);
3114 return NewPN;
3115 }
3116 return NonNull;
3117 }
3118
3119 if (match(V, m_SExtLike(m_Value(A)))) {
3120 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3121 DoesConsume, Depth))
3122 return Builder ? Builder->CreateSExt(AV, V->getType()) : NonNull;
3123 return nullptr;
3124 }
3125
3126 if (match(V, m_Trunc(m_Value(A)))) {
3127 if (auto *AV = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3128 DoesConsume, Depth))
3129 return Builder ? Builder->CreateTrunc(AV, V->getType()) : NonNull;
3130 return nullptr;
3131 }
3132
3133 // De Morgan's Laws:
3134 // (~(A | B)) -> (~A & ~B)
3135 // (~(A & B)) -> (~A | ~B)
3136 auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
3137 bool IsLogical, Value *A,
3138 Value *B) -> Value * {
3139 bool LocalDoesConsume = DoesConsume;
3140 if (!getFreelyInvertedImpl(B, B->hasOneUse(), /*Builder=*/nullptr,
3141 LocalDoesConsume, Depth))
3142 return nullptr;
3143 if (auto *NotA = getFreelyInvertedImpl(A, A->hasOneUse(), Builder,
3144 LocalDoesConsume, Depth)) {
3145 auto *NotB = getFreelyInvertedImpl(B, B->hasOneUse(), Builder,
3146 LocalDoesConsume, Depth);
3147 DoesConsume = LocalDoesConsume;
3148 if (IsLogical)
3149 return Builder ? Builder->CreateLogicalOp(Opcode, NotA, NotB) : NonNull;
3150 return Builder ? Builder->CreateBinOp(Opcode, NotA, NotB) : NonNull;
3151 }
3152
3153 return nullptr;
3154 };
3155
3156 if (match(V, m_Or(m_Value(A), m_Value(B))))
3157 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
3158 B);
3159
3160 if (match(V, m_And(m_Value(A), m_Value(B))))
3161 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
3162 B);
3163
3164 if (match(V, m_LogicalOr(m_Value(A), m_Value(B))))
3165 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
3166 B);
3167
3168 if (match(V, m_LogicalAnd(m_Value(A), m_Value(B))))
3169 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
3170 B);
3171
3172 return nullptr;
3173}
3174
3175/// Return true if we should canonicalize the gep to an i8 ptradd.
3177 Value *PtrOp = GEP.getOperand(0);
3178 Type *GEPEltType = GEP.getSourceElementType();
3179 if (GEPEltType->isIntegerTy(8))
3180 return false;
3181
3182 // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
3183 // intrinsic. This has better support in BasicAA.
3184 if (GEPEltType->isScalableTy())
3185 return true;
3186
3187 // gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two multiplies
3188 // together.
3189 if (GEP.getNumIndices() == 1 &&
3190 match(GEP.getOperand(1),
3192 m_Shl(m_Value(), m_ConstantInt())))))
3193 return true;
3194
3195 // gep (gep %p, C1), %x, C2 is expanded so the two constants can
3196 // possibly be merged together.
3197 auto PtrOpGep = dyn_cast<GEPOperator>(PtrOp);
3198 return PtrOpGep && PtrOpGep->hasAllConstantIndices() &&
3199 any_of(GEP.indices(), [](Value *V) {
3200 const APInt *C;
3201 return match(V, m_APInt(C)) && !C->isZero();
3202 });
3203}
3204
3206 IRBuilderBase &Builder) {
3207 auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
3208 if (!Op1)
3209 return nullptr;
3210
3211 // Don't fold a GEP into itself through a PHI node. This can only happen
3212 // through the back-edge of a loop. Folding a GEP into itself means that
3213 // the value of the previous iteration needs to be stored in the meantime,
3214 // thus requiring an additional register variable to be live, but not
3215 // actually achieving anything (the GEP still needs to be executed once per
3216 // loop iteration).
3217 if (Op1 == &GEP)
3218 return nullptr;
3219 GEPNoWrapFlags NW = Op1->getNoWrapFlags();
3220
3221 int DI = -1;
3222
3223 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
3224 auto *Op2 = dyn_cast<GetElementPtrInst>(*I);
3225 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
3226 Op1->getSourceElementType() != Op2->getSourceElementType())
3227 return nullptr;
3228
3229 // As for Op1 above, don't try to fold a GEP into itself.
3230 if (Op2 == &GEP)
3231 return nullptr;
3232
3233 // Keep track of the type as we walk the GEP.
3234 Type *CurTy = nullptr;
3235
3236 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
3237 if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
3238 return nullptr;
3239
3240 if (Op1->getOperand(J) != Op2->getOperand(J)) {
3241 if (DI == -1) {
3242 // We have not seen any differences yet in the GEPs feeding the
3243 // PHI yet, so we record this one if it is allowed to be a
3244 // variable.
3245
3246 // The first two arguments can vary for any GEP, the rest have to be
3247 // static for struct slots
3248 if (J > 1) {
3249 assert(CurTy && "No current type?");
3250 if (CurTy->isStructTy())
3251 return nullptr;
3252 }
3253
3254 DI = J;
3255 } else {
3256 // The GEP is different by more than one input. While this could be
3257 // extended to support GEPs that vary by more than one variable it
3258 // doesn't make sense since it greatly increases the complexity and
3259 // would result in an R+R+R addressing mode which no backend
3260 // directly supports and would need to be broken into several
3261 // simpler instructions anyway.
3262 return nullptr;
3263 }
3264 }
3265
3266 // Sink down a layer of the type for the next iteration.
3267 if (J > 0) {
3268 if (J == 1) {
3269 CurTy = Op1->getSourceElementType();
3270 } else {
3271 CurTy =
3272 GetElementPtrInst::getTypeAtIndex(CurTy, Op1->getOperand(J));
3273 }
3274 }
3275 }
3276
3277 NW &= Op2->getNoWrapFlags();
3278 }
3279
3280 // If not all GEPs are identical we'll have to create a new PHI node.
3281 // Check that the old PHI node has only one use so that it will get
3282 // removed.
3283 if (DI != -1 && !PN->hasOneUse())
3284 return nullptr;
3285
3286 auto *NewGEP = cast<GetElementPtrInst>(Op1->clone());
3287 NewGEP->setNoWrapFlags(NW);
3288
3289 if (DI == -1) {
3290 // All the GEPs feeding the PHI are identical. Clone one down into our
3291 // BB so that it can be merged with the current GEP.
3292 } else {
3293 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
3294 // into the current block so it can be merged, and create a new PHI to
3295 // set that index.
3296 PHINode *NewPN;
3297 {
3298 IRBuilderBase::InsertPointGuard Guard(Builder);
3299 Builder.SetInsertPoint(PN);
3300 NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(),
3301 PN->getNumOperands());
3302 }
3303
3304 for (auto &I : PN->operands())
3305 NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
3306 PN->getIncomingBlock(I));
3307
3308 NewGEP->setOperand(DI, NewPN);
3309 }
3310
3311 NewGEP->insertBefore(*GEP.getParent(), GEP.getParent()->getFirstInsertionPt());
3312 return NewGEP;
3313}
3314
3316 Value *PtrOp = GEP.getOperand(0);
3317 SmallVector<Value *, 8> Indices(GEP.indices());
3318 Type *GEPType = GEP.getType();
3319 Type *GEPEltType = GEP.getSourceElementType();
3320 if (Value *V =
3321 simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.getNoWrapFlags(),
3322 SQ.getWithInstruction(&GEP)))
3323 return replaceInstUsesWith(GEP, V);
3324
3325 // For vector geps, use the generic demanded vector support.
3326 // Skip if GEP return type is scalable. The number of elements is unknown at
3327 // compile-time.
3328 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
3329 auto VWidth = GEPFVTy->getNumElements();
3330 APInt PoisonElts(VWidth, 0);
3331 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
3332 if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
3333 PoisonElts)) {
3334 if (V != &GEP)
3335 return replaceInstUsesWith(GEP, V);
3336 return &GEP;
3337 }
3338 }
3339
3340 // Eliminate unneeded casts for indices, and replace indices which displace
3341 // by multiples of a zero size type with zero.
3342 bool MadeChange = false;
3343
3344 // Index width may not be the same width as pointer width.
3345 // Data layout chooses the right type based on supported integer types.
3346 Type *NewScalarIndexTy =
3347 DL.getIndexType(GEP.getPointerOperandType()->getScalarType());
3348
3350 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
3351 ++I, ++GTI) {
3352 // Skip indices into struct types.
3353 if (GTI.isStruct())
3354 continue;
3355
3356 Type *IndexTy = (*I)->getType();
3357 Type *NewIndexType =
3358 IndexTy->isVectorTy()
3359 ? VectorType::get(NewScalarIndexTy,
3360 cast<VectorType>(IndexTy)->getElementCount())
3361 : NewScalarIndexTy;
3362
3363 // If the element type has zero size then any index over it is equivalent
3364 // to an index of zero, so replace it with zero if it is not zero already.
3365 Type *EltTy = GTI.getIndexedType();
3366 if (EltTy->isSized() && DL.getTypeAllocSize(EltTy).isZero())
3367 if (!isa<Constant>(*I) || !match(I->get(), m_Zero())) {
3368 *I = Constant::getNullValue(NewIndexType);
3369 MadeChange = true;
3370 }
3371
3372 if (IndexTy != NewIndexType) {
3373 // If we are using a wider index than needed for this platform, shrink
3374 // it to what we need. If narrower, sign-extend it to what we need.
3375 // This explicit cast can make subsequent optimizations more obvious.
3376 if (IndexTy->getScalarSizeInBits() <
3377 NewIndexType->getScalarSizeInBits()) {
3378 if (GEP.hasNoUnsignedWrap() && GEP.hasNoUnsignedSignedWrap())
3379 *I = Builder.CreateZExt(*I, NewIndexType, "", /*IsNonNeg=*/true);
3380 else
3381 *I = Builder.CreateSExt(*I, NewIndexType);
3382 } else {
3383 *I = Builder.CreateTrunc(*I, NewIndexType, "", GEP.hasNoUnsignedWrap(),
3384 GEP.hasNoUnsignedSignedWrap());
3385 }
3386 MadeChange = true;
3387 }
3388 }
3389 if (MadeChange)
3390 return &GEP;
3391
3392 // Canonicalize constant GEPs to i8 type.
3393 if (!GEPEltType->isIntegerTy(8) && GEP.hasAllConstantIndices()) {
3394 APInt Offset(DL.getIndexTypeSizeInBits(GEPType), 0);
3395 if (GEP.accumulateConstantOffset(DL, Offset))
3396 return replaceInstUsesWith(
3397 GEP, Builder.CreatePtrAdd(PtrOp, Builder.getInt(Offset), "",
3398 GEP.getNoWrapFlags()));
3399 }
3400
3402 Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
3403 Value *NewGEP =
3404 Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags());
3405 return replaceInstUsesWith(GEP, NewGEP);
3406 }
3407
3408 // Strip trailing zero indices.
3409 auto *LastIdx = dyn_cast<Constant>(Indices.back());
3410 if (LastIdx && LastIdx->isNullValue() && !LastIdx->getType()->isVectorTy()) {
3411 return replaceInstUsesWith(
3412 GEP, Builder.CreateGEP(GEP.getSourceElementType(), PtrOp,
3413 drop_end(Indices), "", GEP.getNoWrapFlags()));
3414 }
3415
3416 // Strip leading zero indices.
3417 auto *FirstIdx = dyn_cast<Constant>(Indices.front());
3418 if (FirstIdx && FirstIdx->isNullValue() &&
3419 !FirstIdx->getType()->isVectorTy()) {
3421 ++GTI;
3422 if (!GTI.isStruct() && GTI.getSequentialElementStride(DL) ==
3423 DL.getTypeAllocSize(GTI.getIndexedType()))
3424 return replaceInstUsesWith(GEP, Builder.CreateGEP(GTI.getIndexedType(),
3425 GEP.getPointerOperand(),
3426 drop_begin(Indices), "",
3427 GEP.getNoWrapFlags()));
3428 }
3429
3430 // Scalarize vector operands; prefer splat-of-gep.as canonical form.
3431 // Note that this looses information about undef lanes; we run it after
3432 // demanded bits to partially mitigate that loss.
3433 if (GEPType->isVectorTy() && llvm::any_of(GEP.operands(), [](Value *Op) {
3434 return Op->getType()->isVectorTy() && getSplatValue(Op);
3435 })) {
3436 SmallVector<Value *> NewOps;
3437 for (auto &Op : GEP.operands()) {
3438 if (Op->getType()->isVectorTy())
3439 if (Value *Scalar = getSplatValue(Op)) {
3440 NewOps.push_back(Scalar);
3441 continue;
3442 }
3443 NewOps.push_back(Op);
3444 }
3445
3446 Value *Res = Builder.CreateGEP(GEP.getSourceElementType(), NewOps[0],
3447 ArrayRef(NewOps).drop_front(), GEP.getName(),
3448 GEP.getNoWrapFlags());
3449 if (!Res->getType()->isVectorTy()) {
3450 ElementCount EC = cast<VectorType>(GEPType)->getElementCount();
3451 Res = Builder.CreateVectorSplat(EC, Res);
3452 }
3453 return replaceInstUsesWith(GEP, Res);
3454 }
3455
3456 bool SeenNonZeroIndex = false;
3457 for (auto [IdxNum, Idx] : enumerate(Indices)) {
3458 // Ignore one leading zero index.
3459 auto *C = dyn_cast<Constant>(Idx);
3460 if (C && C->isNullValue() && IdxNum == 0)
3461 continue;
3462
3463 if (!SeenNonZeroIndex) {
3464 SeenNonZeroIndex = true;
3465 continue;
3466 }
3467
3468 // GEP has multiple non-zero indices: Split it.
3469 ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(IdxNum);
3470 Value *FrontGEP =
3471 Builder.CreateGEP(GEPEltType, PtrOp, FrontIndices,
3472 GEP.getName() + ".split", GEP.getNoWrapFlags());
3473
3474 SmallVector<Value *> BackIndices;
3475 BackIndices.push_back(Constant::getNullValue(NewScalarIndexTy));
3476 append_range(BackIndices, drop_begin(Indices, IdxNum));
3478 GetElementPtrInst::getIndexedType(GEPEltType, FrontIndices), FrontGEP,
3479 BackIndices, GEP.getNoWrapFlags());
3480 }
3481
3482 // Canonicalize gep %T to gep [sizeof(%T) x i8]:
3483 auto IsCanonicalType = [](Type *Ty) {
3484 if (auto *AT = dyn_cast<ArrayType>(Ty))
3485 Ty = AT->getElementType();
3486 return Ty->isIntegerTy(8);
3487 };
3488 if (Indices.size() == 1 && !IsCanonicalType(GEPEltType)) {
3489 TypeSize Scale = DL.getTypeAllocSize(GEPEltType);
3490 assert(!Scale.isScalable() && "Should have been handled earlier");
3491 Type *NewElemTy = Builder.getInt8Ty();
3492 if (Scale.getFixedValue() != 1)
3493 NewElemTy = ArrayType::get(NewElemTy, Scale.getFixedValue());
3494 GEP.setSourceElementType(NewElemTy);
3495 GEP.setResultElementType(NewElemTy);
3496 // Don't bother revisiting the GEP after this change.
3497 MadeIRChange = true;
3498 }
3499
3500 // Check to see if the inputs to the PHI node are getelementptr instructions.
3501 if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
3502 if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder))
3503 return replaceOperand(GEP, 0, NewPtrOp);
3504 }
3505
3506 if (auto *Src = dyn_cast<GEPOperator>(PtrOp))
3507 if (Instruction *I = visitGEPOfGEP(GEP, Src))
3508 return I;
3509
3510 if (GEP.getNumIndices() == 1) {
3511 unsigned AS = GEP.getPointerAddressSpace();
3512 if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
3513 DL.getIndexSizeInBits(AS)) {
3514 uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
3515
3516 if (TyAllocSize == 1) {
3517 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
3518 // but only if the result pointer is only used as if it were an integer.
3519 // (The case where the underlying object is the same is handled by
3520 // InstSimplify.)
3521 Value *X = GEP.getPointerOperand();
3522 Value *Y;
3523 if (match(GEP.getOperand(1), m_Sub(m_PtrToIntOrAddr(m_Value(Y)),
3525 GEPType == Y->getType()) {
3526 bool HasNonAddressBits =
3527 DL.getAddressSizeInBits(AS) != DL.getPointerSizeInBits(AS);
3528 bool Changed = GEP.replaceUsesWithIf(Y, [&](Use &U) {
3529 return isa<PtrToAddrInst, ICmpInst>(U.getUser()) ||
3530 (!HasNonAddressBits && isa<PtrToIntInst>(U.getUser()));
3531 });
3532 return Changed ? &GEP : nullptr;
3533 }
3534 } else if (auto *ExactIns =
3535 dyn_cast<PossiblyExactOperator>(GEP.getOperand(1))) {
3536 // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
3537 Value *V;
3538 if (ExactIns->isExact()) {
3539 if ((has_single_bit(TyAllocSize) &&
3540 match(GEP.getOperand(1),
3541 m_Shr(m_Value(V),
3542 m_SpecificInt(countr_zero(TyAllocSize))))) ||
3543 match(GEP.getOperand(1),
3544 m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize)))) {
3545 return GetElementPtrInst::Create(Builder.getInt8Ty(),
3546 GEP.getPointerOperand(), V,
3547 GEP.getNoWrapFlags());
3548 }
3549 }
3550 if (ExactIns->isExact() && ExactIns->hasOneUse()) {
3551 // Try to canonicalize non-i8 element type to i8 if the index is an
3552 // exact instruction. If the index is an exact instruction (div/shr)
3553 // with a constant RHS, we can fold the non-i8 element scale into the
3554 // div/shr (similiar to the mul case, just inverted).
3555 const APInt *C;
3556 std::optional<APInt> NewC;
3557 if (has_single_bit(TyAllocSize) &&
3558 match(ExactIns, m_Shr(m_Value(V), m_APInt(C))) &&
3559 C->uge(countr_zero(TyAllocSize)))
3560 NewC = *C - countr_zero(TyAllocSize);
3561 else if (match(ExactIns, m_UDiv(m_Value(V), m_APInt(C)))) {
3562 APInt Quot;
3563 uint64_t Rem;
3564 APInt::udivrem(*C, TyAllocSize, Quot, Rem);
3565 if (Rem == 0)
3566 NewC = Quot;
3567 } else if (match(ExactIns, m_SDiv(m_Value(V), m_APInt(C)))) {
3568 APInt Quot;
3569 int64_t Rem;
3570 APInt::sdivrem(*C, TyAllocSize, Quot, Rem);
3571 // For sdiv we need to make sure we arent creating INT_MIN / -1.
3572 if (!Quot.isAllOnes() && Rem == 0)
3573 NewC = Quot;
3574 }
3575
3576 if (NewC.has_value()) {
3577 Value *NewOp = Builder.CreateBinOp(
3578 static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), V,
3579 ConstantInt::get(V->getType(), *NewC));
3580 cast<BinaryOperator>(NewOp)->setIsExact();
3581 return GetElementPtrInst::Create(Builder.getInt8Ty(),
3582 GEP.getPointerOperand(), NewOp,
3583 GEP.getNoWrapFlags());
3584 }
3585 }
3586 }
3587 }
3588 }
3589 // We do not handle pointer-vector geps here.
3590 if (GEPType->isVectorTy())
3591 return nullptr;
3592
3593 if (!GEP.isInBounds()) {
3594 unsigned IdxWidth =
3595 DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
3596 APInt BasePtrOffset(IdxWidth, 0);
3597 Value *UnderlyingPtrOp =
3598 PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, BasePtrOffset);
3599 bool CanBeNull, CanBeFreed;
3600 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
3601 DL, CanBeNull, CanBeFreed);
3602 if (!CanBeNull && !CanBeFreed && DerefBytes != 0) {
3603 if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
3604 BasePtrOffset.isNonNegative()) {
3605 APInt AllocSize(IdxWidth, DerefBytes);
3606 if (BasePtrOffset.ule(AllocSize)) {
3608 GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
3609 }
3610 }
3611 }
3612 }
3613
3614 // nusw + nneg -> nuw
3615 if (GEP.hasNoUnsignedSignedWrap() && !GEP.hasNoUnsignedWrap() &&
3616 all_of(GEP.indices(), [&](Value *Idx) {
3617 return isKnownNonNegative(Idx, SQ.getWithInstruction(&GEP));
3618 })) {
3619 GEP.setNoWrapFlags(GEP.getNoWrapFlags() | GEPNoWrapFlags::noUnsignedWrap());
3620 return &GEP;
3621 }
3622
3623 // These rewrites are trying to preserve inbounds/nuw attributes. So we want
3624 // to do this after having tried to derive "nuw" above.
3625 if (GEP.getNumIndices() == 1) {
3626 // Given (gep p, x+y) we want to determine the common nowrap flags for both
3627 // geps if transforming into (gep (gep p, x), y).
3628 auto GetPreservedNoWrapFlags = [&](bool AddIsNUW) {
3629 // We can preserve both "inbounds nuw", "nusw nuw" and "nuw" if we know
3630 // that x + y does not have unsigned wrap.
3631 if (GEP.hasNoUnsignedWrap() && AddIsNUW)
3632 return GEP.getNoWrapFlags();
3633 return GEPNoWrapFlags::none();
3634 };
3635
3636 // Try to replace ADD + GEP with GEP + GEP.
3637 Value *Idx1, *Idx2;
3638 if (match(GEP.getOperand(1),
3639 m_OneUse(m_AddLike(m_Value(Idx1), m_Value(Idx2))))) {
3640 // %idx = add i64 %idx1, %idx2
3641 // %gep = getelementptr i32, ptr %ptr, i64 %idx
3642 // as:
3643 // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
3644 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
3645 bool NUW = match(GEP.getOperand(1), m_NUWAddLike(m_Value(), m_Value()));
3646 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3647 auto *NewPtr =
3648 Builder.CreateGEP(GEP.getSourceElementType(), GEP.getPointerOperand(),
3649 Idx1, "", NWFlags);
3650 return replaceInstUsesWith(GEP,
3651 Builder.CreateGEP(GEP.getSourceElementType(),
3652 NewPtr, Idx2, "", NWFlags));
3653 }
3654 ConstantInt *C;
3655 if (match(GEP.getOperand(1), m_OneUse(m_SExtLike(m_OneUse(m_NSWAddLike(
3656 m_Value(Idx1), m_ConstantInt(C))))))) {
3657 // %add = add nsw i32 %idx1, idx2
3658 // %sidx = sext i32 %add to i64
3659 // %gep = getelementptr i32, ptr %ptr, i64 %sidx
3660 // as:
3661 // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
3662 // %newgep = getelementptr i32, ptr %newptr, i32 idx2
3663 bool NUW = match(GEP.getOperand(1),
3665 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3666 auto *NewPtr = Builder.CreateGEP(
3667 GEP.getSourceElementType(), GEP.getPointerOperand(),
3668 Builder.CreateSExt(Idx1, GEP.getOperand(1)->getType()), "", NWFlags);
3669 return replaceInstUsesWith(
3670 GEP,
3671 Builder.CreateGEP(GEP.getSourceElementType(), NewPtr,
3672 Builder.CreateSExt(C, GEP.getOperand(1)->getType()),
3673 "", NWFlags));
3674 }
3675 }
3676
3678 return R;
3679
3680 // srem -> (and/urem) for inbounds+nuw GEP
3681 if (Indices.size() == 1 && GEP.isInBounds() && GEP.hasNoUnsignedWrap()) {
3682 Value *X, *Y;
3683
3684 // Match: idx = srem X, Y -- where Y is a power-of-two value.
3685 if (match(Indices[0], m_OneUse(m_SRem(m_Value(X), m_Value(Y)))) &&
3686 isKnownToBeAPowerOfTwo(Y, /*OrZero=*/true, &GEP)) {
3687 // If GEP is inbounds+nuw, the offset cannot be negative
3688 // -> srem by power-of-two can be treated as urem,
3689 // and urem by power-of-two folds to 'and' later.
3690 // OrZero=true is fine here because division by zero is UB.
3691 Instruction *OldIdxI = cast<Instruction>(Indices[0]);
3692 Value *NewIdx = Builder.CreateURem(X, Y, OldIdxI->getName());
3693
3694 return GetElementPtrInst::Create(GEPEltType, PtrOp, {NewIdx},
3695 GEP.getNoWrapFlags());
3696 }
3697 }
3698
3699 return nullptr;
3700}
3701
3703 Instruction *AI) {
3705 return true;
3706 if (auto *LI = dyn_cast<LoadInst>(V))
3707 return isa<GlobalVariable>(LI->getPointerOperand());
3708 // Two distinct allocations will never be equal.
3709 return isAllocLikeFn(V, &TLI) && V != AI;
3710}
3711
3712/// Given a call CB which uses an address UsedV, return true if we can prove the
3713/// call's only possible effect is storing to V.
3714static bool isRemovableWrite(CallBase &CB, Value *UsedV,
3715 const TargetLibraryInfo &TLI) {
3716 if (!CB.use_empty())
3717 // TODO: add recursion if returned attribute is present
3718 return false;
3719
3720 if (CB.isTerminator())
3721 // TODO: remove implementation restriction
3722 return false;
3723
3724 if (!CB.willReturn() || !CB.doesNotThrow())
3725 return false;
3726
3727 // If the only possible side effect of the call is writing to the alloca,
3728 // and the result isn't used, we can safely remove any reads implied by the
3729 // call including those which might read the alloca itself.
3730 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
3731 return Dest && Dest->Ptr == UsedV;
3732}
3733
3734static std::optional<ModRefInfo>
3736 const TargetLibraryInfo &TLI, bool KnowInit) {
3738 const std::optional<StringRef> Family = getAllocationFamily(AI, &TLI);
3739 Worklist.push_back(AI);
3741
3742 do {
3743 Instruction *PI = Worklist.pop_back_val();
3744 for (User *U : PI->users()) {
3746 switch (I->getOpcode()) {
3747 default:
3748 // Give up the moment we see something we can't handle.
3749 return std::nullopt;
3750
3751 case Instruction::AddrSpaceCast:
3752 case Instruction::BitCast:
3753 case Instruction::GetElementPtr:
3754 Users.emplace_back(I);
3755 Worklist.push_back(I);
3756 continue;
3757
3758 case Instruction::ICmp: {
3759 ICmpInst *ICI = cast<ICmpInst>(I);
3760 // We can fold eq/ne comparisons with null to false/true, respectively.
3761 // We also fold comparisons in some conditions provided the alloc has
3762 // not escaped (see isNeverEqualToUnescapedAlloc).
3763 if (!ICI->isEquality())
3764 return std::nullopt;
3765 unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
3766 if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
3767 return std::nullopt;
3768
3769 // Do not fold compares to aligned_alloc calls, as they may have to
3770 // return null in case the required alignment cannot be satisfied,
3771 // unless we can prove that both alignment and size are valid.
3772 auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3773 // Check if alignment and size of a call to aligned_alloc is valid,
3774 // that is alignment is a power-of-2 and the size is a multiple of the
3775 // alignment.
3776 const APInt *Alignment;
3777 const APInt *Size;
3778 return match(CB->getArgOperand(0), m_APInt(Alignment)) &&
3779 match(CB->getArgOperand(1), m_APInt(Size)) &&
3780 Alignment->isPowerOf2() && Size->urem(*Alignment).isZero();
3781 };
3782 auto *CB = dyn_cast<CallBase>(AI);
3783 LibFunc TheLibFunc;
3784 if (CB && TLI.getLibFunc(*CB->getCalledFunction(), TheLibFunc) &&
3785 TLI.has(TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3786 !AlignmentAndSizeKnownValid(CB))
3787 return std::nullopt;
3788 Users.emplace_back(I);
3789 continue;
3790 }
3791
3792 case Instruction::Call:
3793 // Ignore no-op and store intrinsics.
3795 switch (II->getIntrinsicID()) {
3796 default:
3797 return std::nullopt;
3798
3799 case Intrinsic::memmove:
3800 case Intrinsic::memcpy:
3801 case Intrinsic::memset: {
3803 if (MI->isVolatile())
3804 return std::nullopt;
3805 // Note: this could also be ModRef, but we can still interpret that
3806 // as just Mod in that case.
3807 ModRefInfo NewAccess =
3808 MI->getRawDest() == PI ? ModRefInfo::Mod : ModRefInfo::Ref;
3809 if ((Access & ~NewAccess) != ModRefInfo::NoModRef)
3810 return std::nullopt;
3811 Access |= NewAccess;
3812 [[fallthrough]];
3813 }
3814 case Intrinsic::assume:
3815 case Intrinsic::invariant_start:
3816 case Intrinsic::invariant_end:
3817 case Intrinsic::lifetime_start:
3818 case Intrinsic::lifetime_end:
3819 case Intrinsic::objectsize:
3820 Users.emplace_back(I);
3821 continue;
3822 case Intrinsic::launder_invariant_group:
3823 case Intrinsic::strip_invariant_group:
3824 Users.emplace_back(I);
3825 Worklist.push_back(I);
3826 continue;
3827 }
3828 }
3829
3830 if (Family && getFreedOperand(cast<CallBase>(I), &TLI) == PI &&
3831 getAllocationFamily(I, &TLI) == Family) {
3832 Users.emplace_back(I);
3833 continue;
3834 }
3835
3836 if (Family && getReallocatedOperand(cast<CallBase>(I)) == PI &&
3837 getAllocationFamily(I, &TLI) == Family) {
3838 Users.emplace_back(I);
3839 Worklist.push_back(I);
3840 continue;
3841 }
3842
3843 if (!isRefSet(Access) &&
3844 isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
3846 Users.emplace_back(I);
3847 continue;
3848 }
3849
3850 return std::nullopt;
3851
3852 case Instruction::Store: {
3854 if (SI->isVolatile() || SI->getPointerOperand() != PI)
3855 return std::nullopt;
3856 if (isRefSet(Access))
3857 return std::nullopt;
3859 Users.emplace_back(I);
3860 continue;
3861 }
3862
3863 case Instruction::Load: {
3864 LoadInst *LI = cast<LoadInst>(I);
3865 if (LI->isVolatile() || LI->getPointerOperand() != PI)
3866 return std::nullopt;
3867 if (isModSet(Access))
3868 return std::nullopt;
3870 Users.emplace_back(I);
3871 continue;
3872 }
3873 }
3874 llvm_unreachable("missing a return?");
3875 }
3876 } while (!Worklist.empty());
3877
3879 return Access;
3880}
3881
3884
3885 // If we have a malloc call which is only used in any amount of comparisons to
3886 // null and free calls, delete the calls and replace the comparisons with true
3887 // or false as appropriate.
3888
3889 // This is based on the principle that we can substitute our own allocation
3890 // function (which will never return null) rather than knowledge of the
3891 // specific function being called. In some sense this can change the permitted
3892 // outputs of a program (when we convert a malloc to an alloca, the fact that
3893 // the allocation is now on the stack is potentially visible, for example),
3894 // but we believe in a permissible manner.
3896
3897 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
3898 // before each store.
3900 std::unique_ptr<DIBuilder> DIB;
3901 if (isa<AllocaInst>(MI)) {
3902 findDbgUsers(&MI, DVRs);
3903 DIB.reset(new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
3904 }
3905
3906 // Determine what getInitialValueOfAllocation would return without actually
3907 // allocating the result.
3908 bool KnowInitUndef = false;
3909 bool KnowInitZero = false;
3910 Constant *Init =
3912 if (Init) {
3913 if (isa<UndefValue>(Init))
3914 KnowInitUndef = true;
3915 else if (Init->isNullValue())
3916 KnowInitZero = true;
3917 }
3918 // The various sanitizers don't actually return undef memory, but rather
3919 // memory initialized with special forms of runtime poison
3920 auto &F = *MI.getFunction();
3921 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
3922 F.hasFnAttribute(Attribute::SanitizeAddress))
3923 KnowInitUndef = false;
3924
3925 auto Removable =
3926 isAllocSiteRemovable(&MI, Users, TLI, KnowInitZero | KnowInitUndef);
3927 if (Removable) {
3928 for (WeakTrackingVH &User : Users) {
3929 // Lowering all @llvm.objectsize and MTI calls first because they may use
3930 // a bitcast/GEP of the alloca we are removing.
3931 if (!User)
3932 continue;
3933
3935
3937 if (II->getIntrinsicID() == Intrinsic::objectsize) {
3938 SmallVector<Instruction *> InsertedInstructions;
3939 Value *Result = lowerObjectSizeCall(
3940 II, DL, &TLI, AA, /*MustSucceed=*/true, &InsertedInstructions);
3941 for (Instruction *Inserted : InsertedInstructions)
3942 Worklist.add(Inserted);
3943 replaceInstUsesWith(*I, Result);
3945 User = nullptr; // Skip examining in the next loop.
3946 continue;
3947 }
3948 if (auto *MTI = dyn_cast<MemTransferInst>(I)) {
3949 if (KnowInitZero && isRefSet(*Removable)) {
3951 Builder.SetInsertPoint(MTI);
3952 auto *M = Builder.CreateMemSet(
3953 MTI->getRawDest(),
3954 ConstantInt::get(Type::getInt8Ty(MI.getContext()), 0),
3955 MTI->getLength(), MTI->getDestAlign());
3956 M->copyMetadata(*MTI);
3957 }
3958 }
3959 }
3960 }
3961 for (WeakTrackingVH &User : Users) {
3962 if (!User)
3963 continue;
3964
3966
3967 if (ICmpInst *C = dyn_cast<ICmpInst>(I)) {
3969 ConstantInt::get(Type::getInt1Ty(C->getContext()),
3970 C->isFalseWhenEqual()));
3971 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
3972 for (auto *DVR : DVRs)
3973 if (DVR->isAddressOfVariable())
3975 } else {
3976 // Casts, GEP, or anything else: we're about to delete this instruction,
3977 // so it can not have any valid uses.
3979 if (isa<LoadInst>(I)) {
3980 assert(KnowInitZero || KnowInitUndef);
3981 Replace = KnowInitUndef ? UndefValue::get(I->getType())
3982 : Constant::getNullValue(I->getType());
3983 } else
3984 Replace = PoisonValue::get(I->getType());
3986 }
3988 }
3989
3991 // Replace invoke with a NOP intrinsic to maintain the original CFG
3992 Module *M = II->getModule();
3993 Function *F = Intrinsic::getOrInsertDeclaration(M, Intrinsic::donothing);
3994 auto *NewII = InvokeInst::Create(
3995 F, II->getNormalDest(), II->getUnwindDest(), {}, "", II->getParent());
3996 NewII->setDebugLoc(II->getDebugLoc());
3997 }
3998
3999 // Remove debug intrinsics which describe the value contained within the
4000 // alloca. In addition to removing dbg.{declare,addr} which simply point to
4001 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
4002 //
4003 // ```
4004 // define void @foo(i32 %0) {
4005 // %a = alloca i32 ; Deleted.
4006 // store i32 %0, i32* %a
4007 // dbg.value(i32 %0, "arg0") ; Not deleted.
4008 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
4009 // call void @trivially_inlinable_no_op(i32* %a)
4010 // ret void
4011 // }
4012 // ```
4013 //
4014 // This may not be required if we stop describing the contents of allocas
4015 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
4016 // the LowerDbgDeclare utility.
4017 //
4018 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
4019 // "arg0" dbg.value may be stale after the call. However, failing to remove
4020 // the DW_OP_deref dbg.value causes large gaps in location coverage.
4021 //
4022 // FIXME: the Assignment Tracking project has now likely made this
4023 // redundant (and it's sometimes harmful).
4024 for (auto *DVR : DVRs)
4025 if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref())
4026 DVR->eraseFromParent();
4027
4028 return eraseInstFromFunction(MI);
4029 }
4030 return nullptr;
4031}
4032
4033/// Move the call to free before a NULL test.
4034///
4035/// Check if this free is accessed after its argument has been test
4036/// against NULL (property 0).
4037/// If yes, it is legal to move this call in its predecessor block.
4038///
4039/// The move is performed only if the block containing the call to free
4040/// will be removed, i.e.:
4041/// 1. it has only one predecessor P, and P has two successors
4042/// 2. it contains the call, noops, and an unconditional branch
4043/// 3. its successor is the same as its predecessor's successor
4044///
4045/// The profitability is out-of concern here and this function should
4046/// be called only if the caller knows this transformation would be
4047/// profitable (e.g., for code size).
4049 const DataLayout &DL) {
4050 Value *Op = FI.getArgOperand(0);
4051 BasicBlock *FreeInstrBB = FI.getParent();
4052 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
4053
4054 // Validate part of constraint #1: Only one predecessor
4055 // FIXME: We can extend the number of predecessor, but in that case, we
4056 // would duplicate the call to free in each predecessor and it may
4057 // not be profitable even for code size.
4058 if (!PredBB)
4059 return nullptr;
4060
4061 // Validate constraint #2: Does this block contains only the call to
4062 // free, noops, and an unconditional branch?
4063 BasicBlock *SuccBB;
4064 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
4065 if (!match(FreeInstrBBTerminator, m_UnconditionalBr(SuccBB)))
4066 return nullptr;
4067
4068 // If there are only 2 instructions in the block, at this point,
4069 // this is the call to free and unconditional.
4070 // If there are more than 2 instructions, check that they are noops
4071 // i.e., they won't hurt the performance of the generated code.
4072 if (FreeInstrBB->size() != 2) {
4073 for (const Instruction &Inst : *FreeInstrBB) {
4074 if (&Inst == &FI || &Inst == FreeInstrBBTerminator ||
4076 continue;
4077 auto *Cast = dyn_cast<CastInst>(&Inst);
4078 if (!Cast || !Cast->isNoopCast(DL))
4079 return nullptr;
4080 }
4081 }
4082 // Validate the rest of constraint #1 by matching on the pred branch.
4083 Instruction *TI = PredBB->getTerminator();
4084 BasicBlock *TrueBB, *FalseBB;
4085 CmpPredicate Pred;
4086 if (!match(TI, m_Br(m_ICmp(Pred,
4088 m_Specific(Op->stripPointerCasts())),
4089 m_Zero()),
4090 TrueBB, FalseBB)))
4091 return nullptr;
4092 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
4093 return nullptr;
4094
4095 // Validate constraint #3: Ensure the null case just falls through.
4096 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
4097 return nullptr;
4098 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
4099 "Broken CFG: missing edge from predecessor to successor");
4100
4101 // At this point, we know that everything in FreeInstrBB can be moved
4102 // before TI.
4103 for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
4104 if (&Instr == FreeInstrBBTerminator)
4105 break;
4106 Instr.moveBeforePreserving(TI->getIterator());
4107 }
4108 assert(FreeInstrBB->size() == 1 &&
4109 "Only the branch instruction should remain");
4110
4111 // Now that we've moved the call to free before the NULL check, we have to
4112 // remove any attributes on its parameter that imply it's non-null, because
4113 // those attributes might have only been valid because of the NULL check, and
4114 // we can get miscompiles if we keep them. This is conservative if non-null is
4115 // also implied by something other than the NULL check, but it's guaranteed to
4116 // be correct, and the conservativeness won't matter in practice, since the
4117 // attributes are irrelevant for the call to free itself and the pointer
4118 // shouldn't be used after the call.
4119 AttributeList Attrs = FI.getAttributes();
4120 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
4121 Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
4122 if (Dereferenceable.isValid()) {
4123 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
4124 Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
4125 Attribute::Dereferenceable);
4126 Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
4127 }
4128 FI.setAttributes(Attrs);
4129
4130 return &FI;
4131}
4132
4134 // free undef -> unreachable.
4135 if (isa<UndefValue>(Op)) {
4136 // Leave a marker since we can't modify the CFG here.
4138 return eraseInstFromFunction(FI);
4139 }
4140
4141 // If we have 'free null' delete the instruction. This can happen in stl code
4142 // when lots of inlining happens.
4144 return eraseInstFromFunction(FI);
4145
4146 // If we had free(realloc(...)) with no intervening uses, then eliminate the
4147 // realloc() entirely.
4149 if (CI && CI->hasOneUse())
4150 if (Value *ReallocatedOp = getReallocatedOperand(CI))
4151 return eraseInstFromFunction(*replaceInstUsesWith(*CI, ReallocatedOp));
4152
4153 // If we optimize for code size, try to move the call to free before the null
4154 // test so that simplify cfg can remove the empty block and dead code
4155 // elimination the branch. I.e., helps to turn something like:
4156 // if (foo) free(foo);
4157 // into
4158 // free(foo);
4159 //
4160 // Note that we can only do this for 'free' and not for any flavor of
4161 // 'operator delete'; there is no 'operator delete' symbol for which we are
4162 // permitted to invent a call, even if we're passing in a null pointer.
4163 if (MinimizeSize) {
4164 LibFunc Func;
4165 if (TLI.getLibFunc(FI, Func) && TLI.has(Func) && Func == LibFunc_free)
4167 return I;
4168 }
4169
4170 return nullptr;
4171}
4172
4174 Value *RetVal = RI.getReturnValue();
4175 if (!RetVal)
4176 return nullptr;
4177
4178 Function *F = RI.getFunction();
4179 Type *RetTy = RetVal->getType();
4180 if (RetTy->isPointerTy()) {
4181 bool HasDereferenceable =
4182 F->getAttributes().getRetDereferenceableBytes() > 0;
4183 if (F->hasRetAttribute(Attribute::NonNull) ||
4184 (HasDereferenceable &&
4186 if (Value *V = simplifyNonNullOperand(RetVal, HasDereferenceable))
4187 return replaceOperand(RI, 0, V);
4188 }
4189 }
4190
4191 if (!AttributeFuncs::isNoFPClassCompatibleType(RetTy))
4192 return nullptr;
4193
4194 FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
4195 if (ReturnClass == fcNone)
4196 return nullptr;
4197
4198 KnownFPClass KnownClass;
4199 if (SimplifyDemandedFPClass(&RI, 0, ~ReturnClass, KnownClass,
4200 SQ.getWithInstruction(&RI)))
4201 return &RI;
4202
4203 return nullptr;
4204}
4205
4206// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
4208 // Try to remove the previous instruction if it must lead to unreachable.
4209 // This includes instructions like stores and "llvm.assume" that may not get
4210 // removed by simple dead code elimination.
4211 bool Changed = false;
4212 while (Instruction *Prev = I.getPrevNode()) {
4213 // While we theoretically can erase EH, that would result in a block that
4214 // used to start with an EH no longer starting with EH, which is invalid.
4215 // To make it valid, we'd need to fixup predecessors to no longer refer to
4216 // this block, but that changes CFG, which is not allowed in InstCombine.
4217 if (Prev->isEHPad())
4218 break; // Can not drop any more instructions. We're done here.
4219
4221 break; // Can not drop any more instructions. We're done here.
4222 // Otherwise, this instruction can be freely erased,
4223 // even if it is not side-effect free.
4224
4225 // A value may still have uses before we process it here (for example, in
4226 // another unreachable block), so convert those to poison.
4227 replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
4228 eraseInstFromFunction(*Prev);
4229 Changed = true;
4230 }
4231 return Changed;
4232}
4233
4238
4240 // If this store is the second-to-last instruction in the basic block
4241 // (excluding debug info) and if the block ends with
4242 // an unconditional branch, try to move the store to the successor block.
4243
4244 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
4245 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
4246 do {
4247 if (BBI != FirstInstr)
4248 --BBI;
4249 } while (BBI != FirstInstr && BBI->isDebugOrPseudoInst());
4250
4251 return dyn_cast<StoreInst>(BBI);
4252 };
4253
4254 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
4256 return &BI;
4257
4258 return nullptr;
4259}
4260
4263 if (!DeadEdges.insert({From, To}).second)
4264 return;
4265
4266 // Replace phi node operands in successor with poison.
4267 for (PHINode &PN : To->phis())
4268 for (Use &U : PN.incoming_values())
4269 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(U)) {
4270 replaceUse(U, PoisonValue::get(PN.getType()));
4271 addToWorklist(&PN);
4272 MadeIRChange = true;
4273 }
4274
4275 Worklist.push_back(To);
4276}
4277
4278// Under the assumption that I is unreachable, remove it and following
4279// instructions. Changes are reported directly to MadeIRChange.
4282 BasicBlock *BB = I->getParent();
4283 for (Instruction &Inst : make_early_inc_range(
4284 make_range(std::next(BB->getTerminator()->getReverseIterator()),
4285 std::next(I->getReverseIterator())))) {
4286 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
4287 replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType()));
4288 MadeIRChange = true;
4289 }
4290 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
4291 continue;
4292 // RemoveDIs: erase debug-info on this instruction manually.
4293 Inst.dropDbgRecords();
4295 MadeIRChange = true;
4296 }
4297
4300 MadeIRChange = true;
4301 for (Value *V : Changed)
4303 }
4304
4305 // Handle potentially dead successors.
4306 for (BasicBlock *Succ : successors(BB))
4307 addDeadEdge(BB, Succ, Worklist);
4308}
4309
4312 while (!Worklist.empty()) {
4313 BasicBlock *BB = Worklist.pop_back_val();
4314 if (!all_of(predecessors(BB), [&](BasicBlock *Pred) {
4315 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
4316 }))
4317 continue;
4318
4320 }
4321}
4322
4324 BasicBlock *LiveSucc) {
4326 for (BasicBlock *Succ : successors(BB)) {
4327 // The live successor isn't dead.
4328 if (Succ == LiveSucc)
4329 continue;
4330
4331 addDeadEdge(BB, Succ, Worklist);
4332 }
4333
4335}
4336
4338 // Change br (not X), label True, label False to: br X, label False, True
4339 Value *Cond = BI.getCondition();
4340 Value *X;
4341 if (match(Cond, m_Not(m_Value(X))) && !isa<Constant>(X)) {
4342 // Swap Destinations and condition...
4343 BI.swapSuccessors();
4344 if (BPI)
4345 BPI->swapSuccEdgesProbabilities(BI.getParent());
4346 return replaceOperand(BI, 0, X);
4347 }
4348
4349 // Canonicalize logical-and-with-invert as logical-or-with-invert.
4350 // This is done by inverting the condition and swapping successors:
4351 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
4352 Value *Y;
4353 if (isa<SelectInst>(Cond) &&
4354 match(Cond,
4356 Value *NotX = Builder.CreateNot(X, "not." + X->getName());
4357 Value *Or = Builder.CreateLogicalOr(NotX, Y);
4358
4359 // Set weights for the new OR select instruction too.
4361 if (auto *OrInst = dyn_cast<Instruction>(Or)) {
4362 if (auto *CondInst = dyn_cast<Instruction>(Cond)) {
4363 SmallVector<uint32_t> Weights;
4364 if (extractBranchWeights(*CondInst, Weights)) {
4365 assert(Weights.size() == 2 &&
4366 "Unexpected number of branch weights!");
4367 std::swap(Weights[0], Weights[1]);
4368 setBranchWeights(*OrInst, Weights, /*IsExpected=*/false);
4369 }
4370 }
4371 }
4372 }
4373 BI.swapSuccessors();
4374 if (BPI)
4375 BPI->swapSuccEdgesProbabilities(BI.getParent());
4376 return replaceOperand(BI, 0, Or);
4377 }
4378
4379 // If the condition is irrelevant, remove the use so that other
4380 // transforms on the condition become more effective.
4381 if (!isa<ConstantInt>(Cond) && BI.getSuccessor(0) == BI.getSuccessor(1))
4382 return replaceOperand(BI, 0, ConstantInt::getFalse(Cond->getType()));
4383
4384 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
4385 CmpPredicate Pred;
4386 if (match(Cond, m_OneUse(m_FCmp(Pred, m_Value(), m_Value()))) &&
4387 !isCanonicalPredicate(Pred)) {
4388 // Swap destinations and condition.
4389 auto *Cmp = cast<CmpInst>(Cond);
4390 Cmp->setPredicate(CmpInst::getInversePredicate(Pred));
4391 BI.swapSuccessors();
4392 if (BPI)
4393 BPI->swapSuccEdgesProbabilities(BI.getParent());
4394 Worklist.push(Cmp);
4395 return &BI;
4396 }
4397
4398 if (isa<UndefValue>(Cond)) {
4399 handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr);
4400 return nullptr;
4401 }
4402 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
4403 handlePotentiallyDeadSuccessors(BI.getParent(),
4404 BI.getSuccessor(!CI->getZExtValue()));
4405 return nullptr;
4406 }
4407
4408 // Replace all dominated uses of the condition with true/false
4409 // Ignore constant expressions to avoid iterating over uses on other
4410 // functions.
4411 if (!isa<Constant>(Cond) && BI.getSuccessor(0) != BI.getSuccessor(1)) {
4412 for (auto &U : make_early_inc_range(Cond->uses())) {
4413 BasicBlockEdge Edge0(BI.getParent(), BI.getSuccessor(0));
4414 if (DT.dominates(Edge0, U)) {
4415 replaceUse(U, ConstantInt::getTrue(Cond->getType()));
4416 addToWorklist(cast<Instruction>(U.getUser()));
4417 continue;
4418 }
4419 BasicBlockEdge Edge1(BI.getParent(), BI.getSuccessor(1));
4420 if (DT.dominates(Edge1, U)) {
4421 replaceUse(U, ConstantInt::getFalse(Cond->getType()));
4422 addToWorklist(cast<Instruction>(U.getUser()));
4423 }
4424 }
4425 }
4426
4427 DC.registerBranch(&BI);
4428 return nullptr;
4429}
4430
4431// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
4432// we can prove that both (switch C) and (switch X) go to the default when cond
4433// is false/true.
4436 bool IsTrueArm) {
4437 unsigned CstOpIdx = IsTrueArm ? 1 : 2;
4438 auto *C = dyn_cast<ConstantInt>(Select->getOperand(CstOpIdx));
4439 if (!C)
4440 return nullptr;
4441
4442 BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
4443 if (CstBB != SI.getDefaultDest())
4444 return nullptr;
4445 Value *X = Select->getOperand(3 - CstOpIdx);
4446 CmpPredicate Pred;
4447 const APInt *RHSC;
4448 if (!match(Select->getCondition(),
4449 m_ICmp(Pred, m_Specific(X), m_APInt(RHSC))))
4450 return nullptr;
4451 if (IsTrueArm)
4452 Pred = ICmpInst::getInversePredicate(Pred);
4453
4454 // See whether we can replace the select with X
4456 for (auto Case : SI.cases())
4457 if (!CR.contains(Case.getCaseValue()->getValue()))
4458 return nullptr;
4459
4460 return X;
4461}
4462
4464 Value *Cond = SI.getCondition();
4465 Value *Op0;
4466 const APInt *CondOpC;
4467 using InvertFn = std::function<APInt(const APInt &Case, const APInt &C)>;
4468
4469 auto MaybeInvertible = [&](Value *Cond) -> InvertFn {
4470 if (match(Cond, m_Add(m_Value(Op0), m_APInt(CondOpC))))
4471 // Change 'switch (X+C) case Case:' into 'switch (X) case Case-C'.
4472 return [](const APInt &Case, const APInt &C) { return Case - C; };
4473
4474 if (match(Cond, m_Sub(m_APInt(CondOpC), m_Value(Op0))))
4475 // Change 'switch (C-X) case Case:' into 'switch (X) case C-Case'.
4476 return [](const APInt &Case, const APInt &C) { return C - Case; };
4477
4478 if (match(Cond, m_Xor(m_Value(Op0), m_APInt(CondOpC))) &&
4479 !CondOpC->isMinSignedValue() && !CondOpC->isMaxSignedValue())
4480 // Change 'switch (X^C) case Case:' into 'switch (X) case Case^C'.
4481 // Prevent creation of large case values by excluding extremes.
4482 return [](const APInt &Case, const APInt &C) { return Case ^ C; };
4483
4484 return nullptr;
4485 };
4486
4487 // Attempt to invert and simplify the switch condition, as long as the
4488 // condition is not used further, as it may not be profitable otherwise.
4489 if (auto InvertFn = MaybeInvertible(Cond); InvertFn && Cond->hasOneUse()) {
4490 for (auto &Case : SI.cases()) {
4491 const APInt &New = InvertFn(Case.getCaseValue()->getValue(), *CondOpC);
4492 Case.setValue(ConstantInt::get(SI.getContext(), New));
4493 }
4494 return replaceOperand(SI, 0, Op0);
4495 }
4496
4497 uint64_t ShiftAmt;
4498 if (match(Cond, m_Shl(m_Value(Op0), m_ConstantInt(ShiftAmt))) &&
4499 ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
4500 all_of(SI.cases(), [&](const auto &Case) {
4501 return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
4502 })) {
4503 // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
4505 if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() ||
4506 Shl->hasOneUse()) {
4507 Value *NewCond = Op0;
4508 if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
4509 // If the shift may wrap, we need to mask off the shifted bits.
4510 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
4511 NewCond = Builder.CreateAnd(
4512 Op0, APInt::getLowBitsSet(BitWidth, BitWidth - ShiftAmt));
4513 }
4514 for (auto Case : SI.cases()) {
4515 const APInt &CaseVal = Case.getCaseValue()->getValue();
4516 APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
4517 : CaseVal.lshr(ShiftAmt);
4518 Case.setValue(ConstantInt::get(SI.getContext(), ShiftedCase));
4519 }
4520 return replaceOperand(SI, 0, NewCond);
4521 }
4522 }
4523
4524 // Fold switch(zext/sext(X)) into switch(X) if possible.
4525 if (match(Cond, m_ZExtOrSExt(m_Value(Op0)))) {
4526 bool IsZExt = isa<ZExtInst>(Cond);
4527 Type *SrcTy = Op0->getType();
4528 unsigned NewWidth = SrcTy->getScalarSizeInBits();
4529
4530 if (all_of(SI.cases(), [&](const auto &Case) {
4531 const APInt &CaseVal = Case.getCaseValue()->getValue();
4532 return IsZExt ? CaseVal.isIntN(NewWidth)
4533 : CaseVal.isSignedIntN(NewWidth);
4534 })) {
4535 for (auto &Case : SI.cases()) {
4536 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
4537 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
4538 }
4539 return replaceOperand(SI, 0, Op0);
4540 }
4541 }
4542
4543 // Fold switch(select cond, X, Y) into switch(X/Y) if possible
4544 if (auto *Select = dyn_cast<SelectInst>(Cond)) {
4545 if (Value *V =
4546 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true))
4547 return replaceOperand(SI, 0, V);
4548 if (Value *V =
4549 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false))
4550 return replaceOperand(SI, 0, V);
4551 }
4552
4553 KnownBits Known = computeKnownBits(Cond, &SI);
4554 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
4555 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
4556
4557 // Compute the number of leading bits we can ignore.
4558 // TODO: A better way to determine this would use ComputeNumSignBits().
4559 for (const auto &C : SI.cases()) {
4560 LeadingKnownZeros =
4561 std::min(LeadingKnownZeros, C.getCaseValue()->getValue().countl_zero());
4562 LeadingKnownOnes =
4563 std::min(LeadingKnownOnes, C.getCaseValue()->getValue().countl_one());
4564 }
4565
4566 unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
4567
4568 // Shrink the condition operand if the new type is smaller than the old type.
4569 // But do not shrink to a non-standard type, because backend can't generate
4570 // good code for that yet.
4571 // TODO: We can make it aggressive again after fixing PR39569.
4572 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
4573 shouldChangeType(Known.getBitWidth(), NewWidth)) {
4574 IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
4575 Builder.SetInsertPoint(&SI);
4576 Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc");
4577
4578 for (auto Case : SI.cases()) {
4579 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
4580 Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
4581 }
4582 return replaceOperand(SI, 0, NewCond);
4583 }
4584
4585 if (isa<UndefValue>(Cond)) {
4586 handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr);
4587 return nullptr;
4588 }
4589 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
4591 SI.findCaseValue(CI)->getCaseSuccessor());
4592 return nullptr;
4593 }
4594
4595 return nullptr;
4596}
4597
4599InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
4601 if (!WO)
4602 return nullptr;
4603
4604 Intrinsic::ID OvID = WO->getIntrinsicID();
4605 const APInt *C = nullptr;
4606 if (match(WO->getRHS(), m_APIntAllowPoison(C))) {
4607 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
4608 OvID == Intrinsic::umul_with_overflow)) {
4609 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
4610 if (C->isAllOnes())
4611 return BinaryOperator::CreateNeg(WO->getLHS());
4612 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
4613 if (C->isPowerOf2()) {
4614 return BinaryOperator::CreateShl(
4615 WO->getLHS(),
4616 ConstantInt::get(WO->getLHS()->getType(), C->logBase2()));
4617 }
4618 }
4619 }
4620
4621 // We're extracting from an overflow intrinsic. See if we're the only user.
4622 // That allows us to simplify multiple result intrinsics to simpler things
4623 // that just get one value.
4624 if (!WO->hasOneUse())
4625 return nullptr;
4626
4627 // Check if we're grabbing only the result of a 'with overflow' intrinsic
4628 // and replace it with a traditional binary instruction.
4629 if (*EV.idx_begin() == 0) {
4630 Instruction::BinaryOps BinOp = WO->getBinaryOp();
4631 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
4632 // Replace the old instruction's uses with poison.
4633 replaceInstUsesWith(*WO, PoisonValue::get(WO->getType()));
4635 return BinaryOperator::Create(BinOp, LHS, RHS);
4636 }
4637
4638 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
4639
4640 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
4641 if (OvID == Intrinsic::usub_with_overflow)
4642 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
4643
4644 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
4645 // +1 is not possible because we assume signed values.
4646 if (OvID == Intrinsic::smul_with_overflow &&
4647 WO->getLHS()->getType()->isIntOrIntVectorTy(1))
4648 return BinaryOperator::CreateAnd(WO->getLHS(), WO->getRHS());
4649
4650 // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
4651 if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
4652 unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
4653 // Only handle even bitwidths for performance reasons.
4654 if (BitWidth % 2 == 0)
4655 return new ICmpInst(
4656 ICmpInst::ICMP_UGT, WO->getLHS(),
4657 ConstantInt::get(WO->getLHS()->getType(),
4659 }
4660
4661 // If only the overflow result is used, and the right hand side is a
4662 // constant (or constant splat), we can remove the intrinsic by directly
4663 // checking for overflow.
4664 if (C) {
4665 // Compute the no-wrap range for LHS given RHS=C, then construct an
4666 // equivalent icmp, potentially using an offset.
4667 ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
4668 WO->getBinaryOp(), *C, WO->getNoWrapKind());
4669
4670 CmpInst::Predicate Pred;
4671 APInt NewRHSC, Offset;
4672 NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
4673 auto *OpTy = WO->getRHS()->getType();
4674 auto *NewLHS = WO->getLHS();
4675 if (Offset != 0)
4676 NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
4677 return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
4678 ConstantInt::get(OpTy, NewRHSC));
4679 }
4680
4681 return nullptr;
4682}
4683
4686 InstCombiner::BuilderTy &Builder) {
4687 // Helper to fold frexp of select to select of frexp.
4688
4689 if (!SelectInst->hasOneUse() || !FrexpCall->hasOneUse())
4690 return nullptr;
4692 Value *TrueVal = SelectInst->getTrueValue();
4693 Value *FalseVal = SelectInst->getFalseValue();
4694
4695 const APFloat *ConstVal = nullptr;
4696 Value *VarOp = nullptr;
4697 bool ConstIsTrue = false;
4698
4699 if (match(TrueVal, m_APFloat(ConstVal))) {
4700 VarOp = FalseVal;
4701 ConstIsTrue = true;
4702 } else if (match(FalseVal, m_APFloat(ConstVal))) {
4703 VarOp = TrueVal;
4704 ConstIsTrue = false;
4705 } else {
4706 return nullptr;
4707 }
4708
4709 Builder.SetInsertPoint(&EV);
4710
4711 CallInst *NewFrexp =
4712 Builder.CreateCall(FrexpCall->getCalledFunction(), {VarOp}, "frexp");
4713 NewFrexp->copyIRFlags(FrexpCall);
4714
4715 Value *NewEV = Builder.CreateExtractValue(NewFrexp, 0, "mantissa");
4716
4717 int Exp;
4718 APFloat Mantissa = frexp(*ConstVal, Exp, APFloat::rmNearestTiesToEven);
4719
4720 Constant *ConstantMantissa = ConstantFP::get(TrueVal->getType(), Mantissa);
4721
4722 Value *NewSel = Builder.CreateSelectFMF(
4723 Cond, ConstIsTrue ? ConstantMantissa : NewEV,
4724 ConstIsTrue ? NewEV : ConstantMantissa, SelectInst, "select.frexp");
4725 return NewSel;
4726}
4728 Value *Agg = EV.getAggregateOperand();
4729
4730 if (!EV.hasIndices())
4731 return replaceInstUsesWith(EV, Agg);
4732
4733 if (Value *V = simplifyExtractValueInst(Agg, EV.getIndices(),
4734 SQ.getWithInstruction(&EV)))
4735 return replaceInstUsesWith(EV, V);
4736
4737 Value *Cond, *TrueVal, *FalseVal;
4739 m_Value(Cond), m_Value(TrueVal), m_Value(FalseVal)))))) {
4740 auto *SelInst =
4741 cast<SelectInst>(cast<IntrinsicInst>(Agg)->getArgOperand(0));
4742 if (Value *Result =
4743 foldFrexpOfSelect(EV, cast<IntrinsicInst>(Agg), SelInst, Builder))
4744 return replaceInstUsesWith(EV, Result);
4745 }
4747 // We're extracting from an insertvalue instruction, compare the indices
4748 const unsigned *exti, *exte, *insi, *inse;
4749 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
4750 exte = EV.idx_end(), inse = IV->idx_end();
4751 exti != exte && insi != inse;
4752 ++exti, ++insi) {
4753 if (*insi != *exti)
4754 // The insert and extract both reference distinctly different elements.
4755 // This means the extract is not influenced by the insert, and we can
4756 // replace the aggregate operand of the extract with the aggregate
4757 // operand of the insert. i.e., replace
4758 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4759 // %E = extractvalue { i32, { i32 } } %I, 0
4760 // with
4761 // %E = extractvalue { i32, { i32 } } %A, 0
4762 return ExtractValueInst::Create(IV->getAggregateOperand(),
4763 EV.getIndices());
4764 }
4765 if (exti == exte && insi == inse)
4766 // Both iterators are at the end: Index lists are identical. Replace
4767 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4768 // %C = extractvalue { i32, { i32 } } %B, 1, 0
4769 // with "i32 42"
4770 return replaceInstUsesWith(EV, IV->getInsertedValueOperand());
4771 if (exti == exte) {
4772 // The extract list is a prefix of the insert list. i.e. replace
4773 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4774 // %E = extractvalue { i32, { i32 } } %I, 1
4775 // with
4776 // %X = extractvalue { i32, { i32 } } %A, 1
4777 // %E = insertvalue { i32 } %X, i32 42, 0
4778 // by switching the order of the insert and extract (though the
4779 // insertvalue should be left in, since it may have other uses).
4780 Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(),
4781 EV.getIndices());
4782 return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
4783 ArrayRef(insi, inse));
4784 }
4785 if (insi == inse)
4786 // The insert list is a prefix of the extract list
4787 // We can simply remove the common indices from the extract and make it
4788 // operate on the inserted value instead of the insertvalue result.
4789 // i.e., replace
4790 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4791 // %E = extractvalue { i32, { i32 } } %I, 1, 0
4792 // with
4793 // %E extractvalue { i32 } { i32 42 }, 0
4794 return ExtractValueInst::Create(IV->getInsertedValueOperand(),
4795 ArrayRef(exti, exte));
4796 }
4797
4798 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
4799 return R;
4800
4801 if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
4802 // Bail out if the aggregate contains scalable vector type
4803 if (auto *STy = dyn_cast<StructType>(Agg->getType());
4804 STy && STy->isScalableTy())
4805 return nullptr;
4806
4807 // If the (non-volatile) load only has one use, we can rewrite this to a
4808 // load from a GEP. This reduces the size of the load. If a load is used
4809 // only by extractvalue instructions then this either must have been
4810 // optimized before, or it is a struct with padding, in which case we
4811 // don't want to do the transformation as it loses padding knowledge.
4812 if (L->isSimple() && L->hasOneUse()) {
4813 // extractvalue has integer indices, getelementptr has Value*s. Convert.
4814 SmallVector<Value*, 4> Indices;
4815 // Prefix an i32 0 since we need the first element.
4816 Indices.push_back(Builder.getInt32(0));
4817 for (unsigned Idx : EV.indices())
4818 Indices.push_back(Builder.getInt32(Idx));
4819
4820 // We need to insert these at the location of the old load, not at that of
4821 // the extractvalue.
4822 Builder.SetInsertPoint(L);
4823 Value *GEP = Builder.CreateInBoundsGEP(L->getType(),
4824 L->getPointerOperand(), Indices);
4825 Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
4826 // Whatever aliasing information we had for the orignal load must also
4827 // hold for the smaller load, so propagate the annotations.
4828 NL->setAAMetadata(L->getAAMetadata());
4829 // Returning the load directly will cause the main loop to insert it in
4830 // the wrong spot, so use replaceInstUsesWith().
4831 return replaceInstUsesWith(EV, NL);
4832 }
4833 }
4834
4835 if (auto *PN = dyn_cast<PHINode>(Agg))
4836 if (Instruction *Res = foldOpIntoPhi(EV, PN))
4837 return Res;
4838
4839 // Canonicalize extract (select Cond, TV, FV)
4840 // -> select cond, (extract TV), (extract FV)
4841 if (auto *SI = dyn_cast<SelectInst>(Agg))
4842 if (Instruction *R = FoldOpIntoSelect(EV, SI, /*FoldWithMultiUse=*/true))
4843 return R;
4844
4845 // We could simplify extracts from other values. Note that nested extracts may
4846 // already be simplified implicitly by the above: extract (extract (insert) )
4847 // will be translated into extract ( insert ( extract ) ) first and then just
4848 // the value inserted, if appropriate. Similarly for extracts from single-use
4849 // loads: extract (extract (load)) will be translated to extract (load (gep))
4850 // and if again single-use then via load (gep (gep)) to load (gep).
4851 // However, double extracts from e.g. function arguments or return values
4852 // aren't handled yet.
4853 return nullptr;
4854}
4855
4856/// Return 'true' if the given typeinfo will match anything.
4857static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
4858 switch (Personality) {
4862 // The GCC C EH and Rust personality only exists to support cleanups, so
4863 // it's not clear what the semantics of catch clauses are.
4864 return false;
4866 return false;
4868 // While __gnat_all_others_value will match any Ada exception, it doesn't
4869 // match foreign exceptions (or didn't, before gcc-4.7).
4870 return false;
4881 return TypeInfo->isNullValue();
4882 }
4883 llvm_unreachable("invalid enum");
4884}
4885
4886static bool shorter_filter(const Value *LHS, const Value *RHS) {
4887 return
4888 cast<ArrayType>(LHS->getType())->getNumElements()
4889 <
4890 cast<ArrayType>(RHS->getType())->getNumElements();
4891}
4892
4894 // The logic here should be correct for any real-world personality function.
4895 // However if that turns out not to be true, the offending logic can always
4896 // be conditioned on the personality function, like the catch-all logic is.
4897 EHPersonality Personality =
4898 classifyEHPersonality(LI.getParent()->getParent()->getPersonalityFn());
4899
4900 // Simplify the list of clauses, eg by removing repeated catch clauses
4901 // (these are often created by inlining).
4902 bool MakeNewInstruction = false; // If true, recreate using the following:
4903 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
4904 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4905
4906 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
4907 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
4908 bool isLastClause = i + 1 == e;
4909 if (LI.isCatch(i)) {
4910 // A catch clause.
4911 Constant *CatchClause = LI.getClause(i);
4912 Constant *TypeInfo = CatchClause->stripPointerCasts();
4913
4914 // If we already saw this clause, there is no point in having a second
4915 // copy of it.
4916 if (AlreadyCaught.insert(TypeInfo).second) {
4917 // This catch clause was not already seen.
4918 NewClauses.push_back(CatchClause);
4919 } else {
4920 // Repeated catch clause - drop the redundant copy.
4921 MakeNewInstruction = true;
4922 }
4923
4924 // If this is a catch-all then there is no point in keeping any following
4925 // clauses or marking the landingpad as having a cleanup.
4926 if (isCatchAll(Personality, TypeInfo)) {
4927 if (!isLastClause)
4928 MakeNewInstruction = true;
4929 CleanupFlag = false;
4930 break;
4931 }
4932 } else {
4933 // A filter clause. If any of the filter elements were already caught
4934 // then they can be dropped from the filter. It is tempting to try to
4935 // exploit the filter further by saying that any typeinfo that does not
4936 // occur in the filter can't be caught later (and thus can be dropped).
4937 // However this would be wrong, since typeinfos can match without being
4938 // equal (for example if one represents a C++ class, and the other some
4939 // class derived from it).
4940 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4941 Constant *FilterClause = LI.getClause(i);
4942 ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
4943 unsigned NumTypeInfos = FilterType->getNumElements();
4944
4945 // An empty filter catches everything, so there is no point in keeping any
4946 // following clauses or marking the landingpad as having a cleanup. By
4947 // dealing with this case here the following code is made a bit simpler.
4948 if (!NumTypeInfos) {
4949 NewClauses.push_back(FilterClause);
4950 if (!isLastClause)
4951 MakeNewInstruction = true;
4952 CleanupFlag = false;
4953 break;
4954 }
4955
4956 bool MakeNewFilter = false; // If true, make a new filter.
4957 SmallVector<Constant *, 16> NewFilterElts; // New elements.
4958 if (isa<ConstantAggregateZero>(FilterClause)) {
4959 // Not an empty filter - it contains at least one null typeinfo.
4960 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
4961 Constant *TypeInfo =
4963 // If this typeinfo is a catch-all then the filter can never match.
4964 if (isCatchAll(Personality, TypeInfo)) {
4965 // Throw the filter away.
4966 MakeNewInstruction = true;
4967 continue;
4968 }
4969
4970 // There is no point in having multiple copies of this typeinfo, so
4971 // discard all but the first copy if there is more than one.
4972 NewFilterElts.push_back(TypeInfo);
4973 if (NumTypeInfos > 1)
4974 MakeNewFilter = true;
4975 } else {
4976 ConstantArray *Filter = cast<ConstantArray>(FilterClause);
4977 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
4978 NewFilterElts.reserve(NumTypeInfos);
4979
4980 // Remove any filter elements that were already caught or that already
4981 // occurred in the filter. While there, see if any of the elements are
4982 // catch-alls. If so, the filter can be discarded.
4983 bool SawCatchAll = false;
4984 for (unsigned j = 0; j != NumTypeInfos; ++j) {
4985 Constant *Elt = Filter->getOperand(j);
4986 Constant *TypeInfo = Elt->stripPointerCasts();
4987 if (isCatchAll(Personality, TypeInfo)) {
4988 // This element is a catch-all. Bail out, noting this fact.
4989 SawCatchAll = true;
4990 break;
4991 }
4992
4993 // Even if we've seen a type in a catch clause, we don't want to
4994 // remove it from the filter. An unexpected type handler may be
4995 // set up for a call site which throws an exception of the same
4996 // type caught. In order for the exception thrown by the unexpected
4997 // handler to propagate correctly, the filter must be correctly
4998 // described for the call site.
4999 //
5000 // Example:
5001 //
5002 // void unexpected() { throw 1;}
5003 // void foo() throw (int) {
5004 // std::set_unexpected(unexpected);
5005 // try {
5006 // throw 2.0;
5007 // } catch (int i) {}
5008 // }
5009
5010 // There is no point in having multiple copies of the same typeinfo in
5011 // a filter, so only add it if we didn't already.
5012 if (SeenInFilter.insert(TypeInfo).second)
5013 NewFilterElts.push_back(cast<Constant>(Elt));
5014 }
5015 // A filter containing a catch-all cannot match anything by definition.
5016 if (SawCatchAll) {
5017 // Throw the filter away.
5018 MakeNewInstruction = true;
5019 continue;
5020 }
5021
5022 // If we dropped something from the filter, make a new one.
5023 if (NewFilterElts.size() < NumTypeInfos)
5024 MakeNewFilter = true;
5025 }
5026 if (MakeNewFilter) {
5027 FilterType = ArrayType::get(FilterType->getElementType(),
5028 NewFilterElts.size());
5029 FilterClause = ConstantArray::get(FilterType, NewFilterElts);
5030 MakeNewInstruction = true;
5031 }
5032
5033 NewClauses.push_back(FilterClause);
5034
5035 // If the new filter is empty then it will catch everything so there is
5036 // no point in keeping any following clauses or marking the landingpad
5037 // as having a cleanup. The case of the original filter being empty was
5038 // already handled above.
5039 if (MakeNewFilter && !NewFilterElts.size()) {
5040 assert(MakeNewInstruction && "New filter but not a new instruction!");
5041 CleanupFlag = false;
5042 break;
5043 }
5044 }
5045 }
5046
5047 // If several filters occur in a row then reorder them so that the shortest
5048 // filters come first (those with the smallest number of elements). This is
5049 // advantageous because shorter filters are more likely to match, speeding up
5050 // unwinding, but mostly because it increases the effectiveness of the other
5051 // filter optimizations below.
5052 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
5053 unsigned j;
5054 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
5055 for (j = i; j != e; ++j)
5056 if (!isa<ArrayType>(NewClauses[j]->getType()))
5057 break;
5058
5059 // Check whether the filters are already sorted by length. We need to know
5060 // if sorting them is actually going to do anything so that we only make a
5061 // new landingpad instruction if it does.
5062 for (unsigned k = i; k + 1 < j; ++k)
5063 if (shorter_filter(NewClauses[k+1], NewClauses[k])) {
5064 // Not sorted, so sort the filters now. Doing an unstable sort would be
5065 // correct too but reordering filters pointlessly might confuse users.
5066 std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j,
5068 MakeNewInstruction = true;
5069 break;
5070 }
5071
5072 // Look for the next batch of filters.
5073 i = j + 1;
5074 }
5075
5076 // If typeinfos matched if and only if equal, then the elements of a filter L
5077 // that occurs later than a filter F could be replaced by the intersection of
5078 // the elements of F and L. In reality two typeinfos can match without being
5079 // equal (for example if one represents a C++ class, and the other some class
5080 // derived from it) so it would be wrong to perform this transform in general.
5081 // However the transform is correct and useful if F is a subset of L. In that
5082 // case L can be replaced by F, and thus removed altogether since repeating a
5083 // filter is pointless. So here we look at all pairs of filters F and L where
5084 // L follows F in the list of clauses, and remove L if every element of F is
5085 // an element of L. This can occur when inlining C++ functions with exception
5086 // specifications.
5087 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
5088 // Examine each filter in turn.
5089 Value *Filter = NewClauses[i];
5090 ArrayType *FTy = dyn_cast<ArrayType>(Filter->getType());
5091 if (!FTy)
5092 // Not a filter - skip it.
5093 continue;
5094 unsigned FElts = FTy->getNumElements();
5095 // Examine each filter following this one. Doing this backwards means that
5096 // we don't have to worry about filters disappearing under us when removed.
5097 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
5098 Value *LFilter = NewClauses[j];
5099 ArrayType *LTy = dyn_cast<ArrayType>(LFilter->getType());
5100 if (!LTy)
5101 // Not a filter - skip it.
5102 continue;
5103 // If Filter is a subset of LFilter, i.e. every element of Filter is also
5104 // an element of LFilter, then discard LFilter.
5105 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
5106 // If Filter is empty then it is a subset of LFilter.
5107 if (!FElts) {
5108 // Discard LFilter.
5109 NewClauses.erase(J);
5110 MakeNewInstruction = true;
5111 // Move on to the next filter.
5112 continue;
5113 }
5114 unsigned LElts = LTy->getNumElements();
5115 // If Filter is longer than LFilter then it cannot be a subset of it.
5116 if (FElts > LElts)
5117 // Move on to the next filter.
5118 continue;
5119 // At this point we know that LFilter has at least one element.
5120 if (isa<ConstantAggregateZero>(LFilter)) { // LFilter only contains zeros.
5121 // Filter is a subset of LFilter iff Filter contains only zeros (as we
5122 // already know that Filter is not longer than LFilter).
5124 assert(FElts <= LElts && "Should have handled this case earlier!");
5125 // Discard LFilter.
5126 NewClauses.erase(J);
5127 MakeNewInstruction = true;
5128 }
5129 // Move on to the next filter.
5130 continue;
5131 }
5132 ConstantArray *LArray = cast<ConstantArray>(LFilter);
5133 if (isa<ConstantAggregateZero>(Filter)) { // Filter only contains zeros.
5134 // Since Filter is non-empty and contains only zeros, it is a subset of
5135 // LFilter iff LFilter contains a zero.
5136 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
5137 for (unsigned l = 0; l != LElts; ++l)
5138 if (LArray->getOperand(l)->isNullValue()) {
5139 // LFilter contains a zero - discard it.
5140 NewClauses.erase(J);
5141 MakeNewInstruction = true;
5142 break;
5143 }
5144 // Move on to the next filter.
5145 continue;
5146 }
5147 // At this point we know that both filters are ConstantArrays. Loop over
5148 // operands to see whether every element of Filter is also an element of
5149 // LFilter. Since filters tend to be short this is probably faster than
5150 // using a method that scales nicely.
5152 bool AllFound = true;
5153 for (unsigned f = 0; f != FElts; ++f) {
5154 Value *FTypeInfo = FArray->getOperand(f)->stripPointerCasts();
5155 AllFound = false;
5156 for (unsigned l = 0; l != LElts; ++l) {
5157 Value *LTypeInfo = LArray->getOperand(l)->stripPointerCasts();
5158 if (LTypeInfo == FTypeInfo) {
5159 AllFound = true;
5160 break;
5161 }
5162 }
5163 if (!AllFound)
5164 break;
5165 }
5166 if (AllFound) {
5167 // Discard LFilter.
5168 NewClauses.erase(J);
5169 MakeNewInstruction = true;
5170 }
5171 // Move on to the next filter.
5172 }
5173 }
5174
5175 // If we changed any of the clauses, replace the old landingpad instruction
5176 // with a new one.
5177 if (MakeNewInstruction) {
5179 NewClauses.size());
5180 for (Constant *C : NewClauses)
5181 NLI->addClause(C);
5182 // A landing pad with no clauses must have the cleanup flag set. It is
5183 // theoretically possible, though highly unlikely, that we eliminated all
5184 // clauses. If so, force the cleanup flag to true.
5185 if (NewClauses.empty())
5186 CleanupFlag = true;
5187 NLI->setCleanup(CleanupFlag);
5188 return NLI;
5189 }
5190
5191 // Even if none of the clauses changed, we may nonetheless have understood
5192 // that the cleanup flag is pointless. Clear it if so.
5193 if (LI.isCleanup() != CleanupFlag) {
5194 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
5195 LI.setCleanup(CleanupFlag);
5196 return &LI;
5197 }
5198
5199 return nullptr;
5200}
5201
5202Value *
5204 // Try to push freeze through instructions that propagate but don't produce
5205 // poison as far as possible. If an operand of freeze follows three
5206 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
5207 // guaranteed-non-poison operands then push the freeze through to the one
5208 // operand that is not guaranteed non-poison. The actual transform is as
5209 // follows.
5210 // Op1 = ... ; Op1 can be posion
5211 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
5212 // ; single guaranteed-non-poison operands
5213 // ... = Freeze(Op0)
5214 // =>
5215 // Op1 = ...
5216 // Op1.fr = Freeze(Op1)
5217 // ... = Inst(Op1.fr, NonPoisonOps...)
5218 auto *OrigOp = OrigFI.getOperand(0);
5219 auto *OrigOpInst = dyn_cast<Instruction>(OrigOp);
5220
5221 // While we could change the other users of OrigOp to use freeze(OrigOp), that
5222 // potentially reduces their optimization potential, so let's only do this iff
5223 // the OrigOp is only used by the freeze.
5224 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
5225 return nullptr;
5226
5227 // We can't push the freeze through an instruction which can itself create
5228 // poison. If the only source of new poison is flags, we can simply
5229 // strip them (since we know the only use is the freeze and nothing can
5230 // benefit from them.)
5232 /*ConsiderFlagsAndMetadata*/ false))
5233 return nullptr;
5234
5235 // If operand is guaranteed not to be poison, there is no need to add freeze
5236 // to the operand. So we first find the operand that is not guaranteed to be
5237 // poison.
5238 Value *MaybePoisonOperand = nullptr;
5239 for (Value *V : OrigOpInst->operands()) {
5241 // Treat identical operands as a single operand.
5242 (MaybePoisonOperand && MaybePoisonOperand == V))
5243 continue;
5244 if (!MaybePoisonOperand)
5245 MaybePoisonOperand = V;
5246 else
5247 return nullptr;
5248 }
5249
5250 OrigOpInst->dropPoisonGeneratingAnnotations();
5251
5252 // If all operands are guaranteed to be non-poison, we can drop freeze.
5253 if (!MaybePoisonOperand)
5254 return OrigOp;
5255
5256 Builder.SetInsertPoint(OrigOpInst);
5257 Value *FrozenMaybePoisonOperand = Builder.CreateFreeze(
5258 MaybePoisonOperand, MaybePoisonOperand->getName() + ".fr");
5259
5260 OrigOpInst->replaceUsesOfWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
5261 return OrigOp;
5262}
5263
5265 PHINode *PN) {
5266 // Detect whether this is a recurrence with a start value and some number of
5267 // backedge values. We'll check whether we can push the freeze through the
5268 // backedge values (possibly dropping poison flags along the way) until we
5269 // reach the phi again. In that case, we can move the freeze to the start
5270 // value.
5271 Use *StartU = nullptr;
5273 for (Use &U : PN->incoming_values()) {
5274 if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
5275 // Add backedge value to worklist.
5276 Worklist.push_back(U.get());
5277 continue;
5278 }
5279
5280 // Don't bother handling multiple start values.
5281 if (StartU)
5282 return nullptr;
5283 StartU = &U;
5284 }
5285
5286 if (!StartU || Worklist.empty())
5287 return nullptr; // Not a recurrence.
5288
5289 Value *StartV = StartU->get();
5290 BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
5291 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
5292 // We can't insert freeze if the start value is the result of the
5293 // terminator (e.g. an invoke).
5294 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
5295 return nullptr;
5296
5299 while (!Worklist.empty()) {
5300 Value *V = Worklist.pop_back_val();
5301 if (!Visited.insert(V).second)
5302 continue;
5303
5304 if (Visited.size() > 32)
5305 return nullptr; // Limit the total number of values we inspect.
5306
5307 // Assume that PN is non-poison, because it will be after the transform.
5308 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
5309 continue;
5310
5313 /*ConsiderFlagsAndMetadata*/ false))
5314 return nullptr;
5315
5316 DropFlags.push_back(I);
5317 append_range(Worklist, I->operands());
5318 }
5319
5320 for (Instruction *I : DropFlags)
5321 I->dropPoisonGeneratingAnnotations();
5322
5323 if (StartNeedsFreeze) {
5324 Builder.SetInsertPoint(StartBB->getTerminator());
5325 Value *FrozenStartV = Builder.CreateFreeze(StartV,
5326 StartV->getName() + ".fr");
5327 replaceUse(*StartU, FrozenStartV);
5328 }
5329 return replaceInstUsesWith(FI, PN);
5330}
5331
5333 Value *Op = FI.getOperand(0);
5334
5335 if (isa<Constant>(Op) || Op->hasOneUse())
5336 return false;
5337
5338 // Move the freeze directly after the definition of its operand, so that
5339 // it dominates the maximum number of uses. Note that it may not dominate
5340 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
5341 // the normal/default destination. This is why the domination check in the
5342 // replacement below is still necessary.
5343 BasicBlock::iterator MoveBefore;
5344 if (isa<Argument>(Op)) {
5345 MoveBefore =
5347 } else {
5348 auto MoveBeforeOpt = cast<Instruction>(Op)->getInsertionPointAfterDef();
5349 if (!MoveBeforeOpt)
5350 return false;
5351 MoveBefore = *MoveBeforeOpt;
5352 }
5353
5354 // Re-point iterator to come after any debug-info records.
5355 MoveBefore.setHeadBit(false);
5356
5357 bool Changed = false;
5358 if (&FI != &*MoveBefore) {
5359 FI.moveBefore(*MoveBefore->getParent(), MoveBefore);
5360 Changed = true;
5361 }
5362
5363 Changed |= Op->replaceUsesWithIf(
5364 &FI, [&](Use &U) -> bool { return DT.dominates(&FI, U); });
5365
5366 return Changed;
5367}
5368
5369// Check if any direct or bitcast user of this value is a shuffle instruction.
5371 for (auto *U : V->users()) {
5373 return true;
5374 else if (match(U, m_BitCast(m_Specific(V))) && isUsedWithinShuffleVector(U))
5375 return true;
5376 }
5377 return false;
5378}
5379
5381 Value *Op0 = I.getOperand(0);
5382
5383 if (Value *V = simplifyFreezeInst(Op0, SQ.getWithInstruction(&I)))
5384 return replaceInstUsesWith(I, V);
5385
5386 // freeze (phi const, x) --> phi const, (freeze x)
5387 if (auto *PN = dyn_cast<PHINode>(Op0)) {
5388 if (Instruction *NV = foldOpIntoPhi(I, PN))
5389 return NV;
5390 if (Instruction *NV = foldFreezeIntoRecurrence(I, PN))
5391 return NV;
5392 }
5393
5395 return replaceInstUsesWith(I, NI);
5396
5397 // If I is freeze(undef), check its uses and fold it to a fixed constant.
5398 // - or: pick -1
5399 // - select's condition: if the true value is constant, choose it by making
5400 // the condition true.
5401 // - phi: pick the common constant across operands
5402 // - default: pick 0
5403 //
5404 // Note that this transform is intentionally done here rather than
5405 // via an analysis in InstSimplify or at individual user sites. That is
5406 // because we must produce the same value for all uses of the freeze -
5407 // it's the reason "freeze" exists!
5408 //
5409 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
5410 // duplicating logic for binops at least.
5411 auto getUndefReplacement = [&](Type *Ty) {
5412 auto pickCommonConstantFromPHI = [](PHINode &PN) -> Value * {
5413 // phi(freeze(undef), C, C). Choose C for freeze so the PHI can be
5414 // removed.
5415 Constant *BestValue = nullptr;
5416 for (Value *V : PN.incoming_values()) {
5417 if (match(V, m_Freeze(m_Undef())))
5418 continue;
5419
5421 if (!C)
5422 return nullptr;
5423
5425 return nullptr;
5426
5427 if (BestValue && BestValue != C)
5428 return nullptr;
5429
5430 BestValue = C;
5431 }
5432 return BestValue;
5433 };
5434
5435 Value *NullValue = Constant::getNullValue(Ty);
5436 Value *BestValue = nullptr;
5437 for (auto *U : I.users()) {
5438 Value *V = NullValue;
5439 if (match(U, m_Or(m_Value(), m_Value())))
5441 else if (match(U, m_Select(m_Specific(&I), m_Constant(), m_Value())))
5442 V = ConstantInt::getTrue(Ty);
5443 else if (match(U, m_c_Select(m_Specific(&I), m_Value(V)))) {
5444 if (V == &I || !isGuaranteedNotToBeUndefOrPoison(V, &AC, &I, &DT))
5445 V = NullValue;
5446 } else if (auto *PHI = dyn_cast<PHINode>(U)) {
5447 if (Value *MaybeV = pickCommonConstantFromPHI(*PHI))
5448 V = MaybeV;
5449 }
5450
5451 if (!BestValue)
5452 BestValue = V;
5453 else if (BestValue != V)
5454 BestValue = NullValue;
5455 }
5456 assert(BestValue && "Must have at least one use");
5457 assert(BestValue != &I && "Cannot replace with itself");
5458 return BestValue;
5459 };
5460
5461 if (match(Op0, m_Undef())) {
5462 // Don't fold freeze(undef/poison) if it's used as a vector operand in
5463 // a shuffle. This may improve codegen for shuffles that allow
5464 // unspecified inputs.
5466 return nullptr;
5467 return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
5468 }
5469
5470 auto getFreezeVectorReplacement = [](Constant *C) -> Constant * {
5471 Type *Ty = C->getType();
5472 auto *VTy = dyn_cast<FixedVectorType>(Ty);
5473 if (!VTy)
5474 return nullptr;
5475 unsigned NumElts = VTy->getNumElements();
5476 Constant *BestValue = Constant::getNullValue(VTy->getScalarType());
5477 for (unsigned i = 0; i != NumElts; ++i) {
5478 Constant *EltC = C->getAggregateElement(i);
5479 if (EltC && !match(EltC, m_Undef())) {
5480 BestValue = EltC;
5481 break;
5482 }
5483 }
5484 return Constant::replaceUndefsWith(C, BestValue);
5485 };
5486
5487 Constant *C;
5488 if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement() &&
5489 !C->containsConstantExpression()) {
5490 if (Constant *Repl = getFreezeVectorReplacement(C))
5491 return replaceInstUsesWith(I, Repl);
5492 }
5493
5494 // Replace uses of Op with freeze(Op).
5495 if (freezeOtherUses(I))
5496 return &I;
5497
5498 return nullptr;
5499}
5500
5501/// Check for case where the call writes to an otherwise dead alloca. This
5502/// shows up for unused out-params in idiomatic C/C++ code. Note that this
5503/// helper *only* analyzes the write; doesn't check any other legality aspect.
5505 auto *CB = dyn_cast<CallBase>(I);
5506 if (!CB)
5507 // TODO: handle e.g. store to alloca here - only worth doing if we extend
5508 // to allow reload along used path as described below. Otherwise, this
5509 // is simply a store to a dead allocation which will be removed.
5510 return false;
5511 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CB, TLI);
5512 if (!Dest)
5513 return false;
5514 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Dest->Ptr));
5515 if (!AI)
5516 // TODO: allow malloc?
5517 return false;
5518 // TODO: allow memory access dominated by move point? Note that since AI
5519 // could have a reference to itself captured by the call, we would need to
5520 // account for cycles in doing so.
5521 SmallVector<const User *> AllocaUsers;
5523 auto pushUsers = [&](const Instruction &I) {
5524 for (const User *U : I.users()) {
5525 if (Visited.insert(U).second)
5526 AllocaUsers.push_back(U);
5527 }
5528 };
5529 pushUsers(*AI);
5530 while (!AllocaUsers.empty()) {
5531 auto *UserI = cast<Instruction>(AllocaUsers.pop_back_val());
5532 if (isa<GetElementPtrInst>(UserI) || isa<AddrSpaceCastInst>(UserI)) {
5533 pushUsers(*UserI);
5534 continue;
5535 }
5536 if (UserI == CB)
5537 continue;
5538 // TODO: support lifetime.start/end here
5539 return false;
5540 }
5541 return true;
5542}
5543
5544/// Try to move the specified instruction from its current block into the
5545/// beginning of DestBlock, which can only happen if it's safe to move the
5546/// instruction past all of the instructions between it and the end of its
5547/// block.
5549 BasicBlock *DestBlock) {
5550 BasicBlock *SrcBlock = I->getParent();
5551
5552 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
5553 if (isa<PHINode>(I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
5554 I->isTerminator())
5555 return false;
5556
5557 // Do not sink static or dynamic alloca instructions. Static allocas must
5558 // remain in the entry block, and dynamic allocas must not be sunk in between
5559 // a stacksave / stackrestore pair, which would incorrectly shorten its
5560 // lifetime.
5561 if (isa<AllocaInst>(I))
5562 return false;
5563
5564 // Do not sink into catchswitch blocks.
5565 if (isa<CatchSwitchInst>(DestBlock->getTerminator()))
5566 return false;
5567
5568 // Do not sink convergent call instructions.
5569 if (auto *CI = dyn_cast<CallInst>(I)) {
5570 if (CI->isConvergent())
5571 return false;
5572 }
5573
5574 // Unless we can prove that the memory write isn't visibile except on the
5575 // path we're sinking to, we must bail.
5576 if (I->mayWriteToMemory()) {
5577 if (!SoleWriteToDeadLocal(I, TLI))
5578 return false;
5579 }
5580
5581 // We can only sink load instructions if there is nothing between the load and
5582 // the end of block that could change the value.
5583 if (I->mayReadFromMemory() &&
5584 !I->hasMetadata(LLVMContext::MD_invariant_load)) {
5585 // We don't want to do any sophisticated alias analysis, so we only check
5586 // the instructions after I in I's parent block if we try to sink to its
5587 // successor block.
5588 if (DestBlock->getUniquePredecessor() != I->getParent())
5589 return false;
5590 for (BasicBlock::iterator Scan = std::next(I->getIterator()),
5591 E = I->getParent()->end();
5592 Scan != E; ++Scan)
5593 if (Scan->mayWriteToMemory())
5594 return false;
5595 }
5596
5597 I->dropDroppableUses([&](const Use *U) {
5598 auto *I = dyn_cast<Instruction>(U->getUser());
5599 if (I && I->getParent() != DestBlock) {
5600 Worklist.add(I);
5601 return true;
5602 }
5603 return false;
5604 });
5605 /// FIXME: We could remove droppable uses that are not dominated by
5606 /// the new position.
5607
5608 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
5609 I->moveBefore(*DestBlock, InsertPos);
5610 ++NumSunkInst;
5611
5612 // Also sink all related debug uses from the source basic block. Otherwise we
5613 // get debug use before the def. Attempt to salvage debug uses first, to
5614 // maximise the range variables have location for. If we cannot salvage, then
5615 // mark the location undef: we know it was supposed to receive a new location
5616 // here, but that computation has been sunk.
5617 SmallVector<DbgVariableRecord *, 2> DbgVariableRecords;
5618 findDbgUsers(I, DbgVariableRecords);
5619 if (!DbgVariableRecords.empty())
5620 tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
5621 DbgVariableRecords);
5622
5623 // PS: there are numerous flaws with this behaviour, not least that right now
5624 // assignments can be re-ordered past other assignments to the same variable
5625 // if they use different Values. Creating more undef assignements can never be
5626 // undone. And salvaging all users outside of this block can un-necessarily
5627 // alter the lifetime of the live-value that the variable refers to.
5628 // Some of these things can be resolved by tolerating debug use-before-defs in
5629 // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
5630 // being used for more architectures.
5631
5632 return true;
5633}
5634
5636 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
5637 BasicBlock *DestBlock,
5638 SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
5639 // For all debug values in the destination block, the sunk instruction
5640 // will still be available, so they do not need to be dropped.
5641
5642 // Fetch all DbgVariableRecords not already in the destination.
5643 SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage;
5644 for (auto &DVR : DbgVariableRecords)
5645 if (DVR->getParent() != DestBlock)
5646 DbgVariableRecordsToSalvage.push_back(DVR);
5647
5648 // Fetch a second collection, of DbgVariableRecords in the source block that
5649 // we're going to sink.
5650 SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
5651 for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
5652 if (DVR->getParent() == SrcBlock)
5653 DbgVariableRecordsToSink.push_back(DVR);
5654
5655 // Sort DbgVariableRecords according to their position in the block. This is a
5656 // partial order: DbgVariableRecords attached to different instructions will
5657 // be ordered by the instruction order, but DbgVariableRecords attached to the
5658 // same instruction won't have an order.
5659 auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool {
5660 return B->getInstruction()->comesBefore(A->getInstruction());
5661 };
5662 llvm::stable_sort(DbgVariableRecordsToSink, Order);
5663
5664 // If there are two assignments to the same variable attached to the same
5665 // instruction, the ordering between the two assignments is important. Scan
5666 // for this (rare) case and establish which is the last assignment.
5667 using InstVarPair = std::pair<const Instruction *, DebugVariable>;
5669 if (DbgVariableRecordsToSink.size() > 1) {
5671 // Count how many assignments to each variable there is per instruction.
5672 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5673 DebugVariable DbgUserVariable =
5674 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5675 DVR->getDebugLoc()->getInlinedAt());
5676 CountMap[std::make_pair(DVR->getInstruction(), DbgUserVariable)] += 1;
5677 }
5678
5679 // If there are any instructions with two assignments, add them to the
5680 // FilterOutMap to record that they need extra filtering.
5682 for (auto It : CountMap) {
5683 if (It.second > 1) {
5684 FilterOutMap[It.first] = nullptr;
5685 DupSet.insert(It.first.first);
5686 }
5687 }
5688
5689 // For all instruction/variable pairs needing extra filtering, find the
5690 // latest assignment.
5691 for (const Instruction *Inst : DupSet) {
5692 for (DbgVariableRecord &DVR :
5693 llvm::reverse(filterDbgVars(Inst->getDbgRecordRange()))) {
5694 DebugVariable DbgUserVariable =
5695 DebugVariable(DVR.getVariable(), DVR.getExpression(),
5696 DVR.getDebugLoc()->getInlinedAt());
5697 auto FilterIt =
5698 FilterOutMap.find(std::make_pair(Inst, DbgUserVariable));
5699 if (FilterIt == FilterOutMap.end())
5700 continue;
5701 if (FilterIt->second != nullptr)
5702 continue;
5703 FilterIt->second = &DVR;
5704 }
5705 }
5706 }
5707
5708 // Perform cloning of the DbgVariableRecords that we plan on sinking, filter
5709 // out any duplicate assignments identified above.
5711 SmallSet<DebugVariable, 4> SunkVariables;
5712 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5714 continue;
5715
5716 DebugVariable DbgUserVariable =
5717 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5718 DVR->getDebugLoc()->getInlinedAt());
5719
5720 // For any variable where there were multiple assignments in the same place,
5721 // ignore all but the last assignment.
5722 if (!FilterOutMap.empty()) {
5723 InstVarPair IVP = std::make_pair(DVR->getInstruction(), DbgUserVariable);
5724 auto It = FilterOutMap.find(IVP);
5725
5726 // Filter out.
5727 if (It != FilterOutMap.end() && It->second != DVR)
5728 continue;
5729 }
5730
5731 if (!SunkVariables.insert(DbgUserVariable).second)
5732 continue;
5733
5734 if (DVR->isDbgAssign())
5735 continue;
5736
5737 DVRClones.emplace_back(DVR->clone());
5738 LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n');
5739 }
5740
5741 // Perform salvaging without the clones, then sink the clones.
5742 if (DVRClones.empty())
5743 return;
5744
5745 salvageDebugInfoForDbgValues(*I, DbgVariableRecordsToSalvage);
5746
5747 // The clones are in reverse order of original appearance. Assert that the
5748 // head bit is set on the iterator as we _should_ have received it via
5749 // getFirstInsertionPt. Inserting like this will reverse the clone order as
5750 // we'll repeatedly insert at the head, such as:
5751 // DVR-3 (third insertion goes here)
5752 // DVR-2 (second insertion goes here)
5753 // DVR-1 (first insertion goes here)
5754 // Any-Prior-DVRs
5755 // InsertPtInst
5756 assert(InsertPos.getHeadBit());
5757 for (DbgVariableRecord *DVRClone : DVRClones) {
5758 InsertPos->getParent()->insertDbgRecordBefore(DVRClone, InsertPos);
5759 LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n');
5760 }
5761}
5762
5764 while (!Worklist.isEmpty()) {
5765 // Walk deferred instructions in reverse order, and push them to the
5766 // worklist, which means they'll end up popped from the worklist in-order.
5767 while (Instruction *I = Worklist.popDeferred()) {
5768 // Check to see if we can DCE the instruction. We do this already here to
5769 // reduce the number of uses and thus allow other folds to trigger.
5770 // Note that eraseInstFromFunction() may push additional instructions on
5771 // the deferred worklist, so this will DCE whole instruction chains.
5774 ++NumDeadInst;
5775 continue;
5776 }
5777
5778 Worklist.push(I);
5779 }
5780
5781 Instruction *I = Worklist.removeOne();
5782 if (I == nullptr) continue; // skip null values.
5783
5784 // Check to see if we can DCE the instruction.
5787 ++NumDeadInst;
5788 continue;
5789 }
5790
5791 if (!DebugCounter::shouldExecute(VisitCounter))
5792 continue;
5793
5794 // See if we can trivially sink this instruction to its user if we can
5795 // prove that the successor is not executed more frequently than our block.
5796 // Return the UserBlock if successful.
5797 auto getOptionalSinkBlockForInst =
5798 [this](Instruction *I) -> std::optional<BasicBlock *> {
5799 if (!EnableCodeSinking)
5800 return std::nullopt;
5801
5802 BasicBlock *BB = I->getParent();
5803 BasicBlock *UserParent = nullptr;
5804 unsigned NumUsers = 0;
5805
5806 for (Use &U : I->uses()) {
5807 User *User = U.getUser();
5808 if (User->isDroppable()) {
5809 // Do not sink if there are dereferenceable assumes that would be
5810 // removed.
5812 if (II->getIntrinsicID() != Intrinsic::assume ||
5813 !II->getOperandBundle("dereferenceable"))
5814 continue;
5815 }
5816
5817 if (NumUsers > MaxSinkNumUsers)
5818 return std::nullopt;
5819
5820 Instruction *UserInst = cast<Instruction>(User);
5821 // Special handling for Phi nodes - get the block the use occurs in.
5822 BasicBlock *UserBB = UserInst->getParent();
5823 if (PHINode *PN = dyn_cast<PHINode>(UserInst))
5824 UserBB = PN->getIncomingBlock(U);
5825 // Bail out if we have uses in different blocks. We don't do any
5826 // sophisticated analysis (i.e finding NearestCommonDominator of these
5827 // use blocks).
5828 if (UserParent && UserParent != UserBB)
5829 return std::nullopt;
5830 UserParent = UserBB;
5831
5832 // Make sure these checks are done only once, naturally we do the checks
5833 // the first time we get the userparent, this will save compile time.
5834 if (NumUsers == 0) {
5835 // Try sinking to another block. If that block is unreachable, then do
5836 // not bother. SimplifyCFG should handle it.
5837 if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
5838 return std::nullopt;
5839
5840 auto *Term = UserParent->getTerminator();
5841 // See if the user is one of our successors that has only one
5842 // predecessor, so that we don't have to split the critical edge.
5843 // Another option where we can sink is a block that ends with a
5844 // terminator that does not pass control to other block (such as
5845 // return or unreachable or resume). In this case:
5846 // - I dominates the User (by SSA form);
5847 // - the User will be executed at most once.
5848 // So sinking I down to User is always profitable or neutral.
5849 if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term))
5850 return std::nullopt;
5851
5852 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
5853 }
5854
5855 NumUsers++;
5856 }
5857
5858 // No user or only has droppable users.
5859 if (!UserParent)
5860 return std::nullopt;
5861
5862 return UserParent;
5863 };
5864
5865 auto OptBB = getOptionalSinkBlockForInst(I);
5866 if (OptBB) {
5867 auto *UserParent = *OptBB;
5868 // Okay, the CFG is simple enough, try to sink this instruction.
5869 if (tryToSinkInstruction(I, UserParent)) {
5870 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
5871 MadeIRChange = true;
5872 // We'll add uses of the sunk instruction below, but since
5873 // sinking can expose opportunities for it's *operands* add
5874 // them to the worklist
5875 for (Use &U : I->operands())
5876 if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
5877 Worklist.push(OpI);
5878 }
5879 }
5880
5881 // Now that we have an instruction, try combining it to simplify it.
5882 Builder.SetInsertPoint(I);
5883 Builder.CollectMetadataToCopy(
5884 I, {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5885
5886#ifndef NDEBUG
5887 std::string OrigI;
5888#endif
5889 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS););
5890 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
5891
5892 if (Instruction *Result = visit(*I)) {
5893 ++NumCombined;
5894 // Should we replace the old instruction with a new one?
5895 if (Result != I) {
5896 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
5897 << " New = " << *Result << '\n');
5898
5899 // We copy the old instruction's DebugLoc to the new instruction, unless
5900 // InstCombine already assigned a DebugLoc to it, in which case we
5901 // should trust the more specifically selected DebugLoc.
5902 Result->setDebugLoc(Result->getDebugLoc().orElse(I->getDebugLoc()));
5903 // We also copy annotation metadata to the new instruction.
5904 Result->copyMetadata(*I, LLVMContext::MD_annotation);
5905 // Everything uses the new instruction now.
5906 I->replaceAllUsesWith(Result);
5907
5908 // Move the name to the new instruction first.
5909 Result->takeName(I);
5910
5911 // Insert the new instruction into the basic block...
5912 BasicBlock *InstParent = I->getParent();
5913 BasicBlock::iterator InsertPos = I->getIterator();
5914
5915 // Are we replace a PHI with something that isn't a PHI, or vice versa?
5916 if (isa<PHINode>(Result) != isa<PHINode>(I)) {
5917 // We need to fix up the insertion point.
5918 if (isa<PHINode>(I)) // PHI -> Non-PHI
5919 InsertPos = InstParent->getFirstInsertionPt();
5920 else // Non-PHI -> PHI
5921 InsertPos = InstParent->getFirstNonPHIIt();
5922 }
5923
5924 Result->insertInto(InstParent, InsertPos);
5925
5926 // Push the new instruction and any users onto the worklist.
5927 Worklist.pushUsersToWorkList(*Result);
5928 Worklist.push(Result);
5929
5931 } else {
5932 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
5933 << " New = " << *I << '\n');
5934
5935 // If the instruction was modified, it's possible that it is now dead.
5936 // if so, remove it.
5939 } else {
5940 Worklist.pushUsersToWorkList(*I);
5941 Worklist.push(I);
5942 }
5943 }
5944 MadeIRChange = true;
5945 }
5946 }
5947
5948 Worklist.zap();
5949 return MadeIRChange;
5950}
5951
5952// Track the scopes used by !alias.scope and !noalias. In a function, a
5953// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5954// by both sets. If not, the declaration of the scope can be safely omitted.
5955// The MDNode of the scope can be omitted as well for the instructions that are
5956// part of this function. We do not do that at this point, as this might become
5957// too time consuming to do.
5959 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
5960 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
5961
5962public:
5964 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
5965 if (!I->hasMetadataOtherThanDebugLoc())
5966 return;
5967
5968 auto Track = [](Metadata *ScopeList, auto &Container) {
5969 const auto *MDScopeList = dyn_cast_or_null<MDNode>(ScopeList);
5970 if (!MDScopeList || !Container.insert(MDScopeList).second)
5971 return;
5972 for (const auto &MDOperand : MDScopeList->operands())
5973 if (auto *MDScope = dyn_cast<MDNode>(MDOperand))
5974 Container.insert(MDScope);
5975 };
5976
5977 Track(I->getMetadata(LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
5978 Track(I->getMetadata(LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
5979 }
5980
5983 if (!Decl)
5984 return false;
5985
5986 assert(Decl->use_empty() &&
5987 "llvm.experimental.noalias.scope.decl in use ?");
5988 const MDNode *MDSL = Decl->getScopeList();
5989 assert(MDSL->getNumOperands() == 1 &&
5990 "llvm.experimental.noalias.scope should refer to a single scope");
5991 auto &MDOperand = MDSL->getOperand(0);
5992 if (auto *MD = dyn_cast<MDNode>(MDOperand))
5993 return !UsedAliasScopesAndLists.contains(MD) ||
5994 !UsedNoAliasScopesAndLists.contains(MD);
5995
5996 // Not an MDNode ? throw away.
5997 return true;
5998 }
5999};
6000
6001/// Populate the IC worklist from a function, by walking it in reverse
6002/// post-order and adding all reachable code to the worklist.
6003///
6004/// This has a couple of tricks to make the code faster and more powerful. In
6005/// particular, we constant fold and DCE instructions as we go, to avoid adding
6006/// them to the worklist (this significantly speeds up instcombine on code where
6007/// many instructions are dead or constant). Additionally, if we find a branch
6008/// whose condition is a known constant, we only visit the reachable successors.
6010 bool MadeIRChange = false;
6012 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
6013 DenseMap<Constant *, Constant *> FoldedConstants;
6014 AliasScopeTracker SeenAliasScopes;
6015
6016 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
6017 for (BasicBlock *Succ : successors(BB))
6018 if (Succ != LiveSucc && DeadEdges.insert({BB, Succ}).second)
6019 for (PHINode &PN : Succ->phis())
6020 for (Use &U : PN.incoming_values())
6021 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(U)) {
6022 U.set(PoisonValue::get(PN.getType()));
6023 MadeIRChange = true;
6024 }
6025 };
6026
6027 for (BasicBlock *BB : RPOT) {
6028 if (!BB->isEntryBlock() && all_of(predecessors(BB), [&](BasicBlock *Pred) {
6029 return DeadEdges.contains({Pred, BB}) || DT.dominates(BB, Pred);
6030 })) {
6031 HandleOnlyLiveSuccessor(BB, nullptr);
6032 continue;
6033 }
6034 LiveBlocks.insert(BB);
6035
6036 for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
6037 // ConstantProp instruction if trivially constant.
6038 if (!Inst.use_empty() &&
6039 (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
6040 if (Constant *C = ConstantFoldInstruction(&Inst, DL, &TLI)) {
6041 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
6042 << '\n');
6043 Inst.replaceAllUsesWith(C);
6044 ++NumConstProp;
6045 if (isInstructionTriviallyDead(&Inst, &TLI))
6046 Inst.eraseFromParent();
6047 MadeIRChange = true;
6048 continue;
6049 }
6050
6051 // See if we can constant fold its operands.
6052 for (Use &U : Inst.operands()) {
6054 continue;
6055
6056 auto *C = cast<Constant>(U);
6057 Constant *&FoldRes = FoldedConstants[C];
6058 if (!FoldRes)
6059 FoldRes = ConstantFoldConstant(C, DL, &TLI);
6060
6061 if (FoldRes != C) {
6062 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
6063 << "\n Old = " << *C
6064 << "\n New = " << *FoldRes << '\n');
6065 U = FoldRes;
6066 MadeIRChange = true;
6067 }
6068 }
6069
6070 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
6071 // these call instructions consumes non-trivial amount of time and
6072 // provides no value for the optimization.
6073 if (!Inst.isDebugOrPseudoInst()) {
6074 InstrsForInstructionWorklist.push_back(&Inst);
6075 SeenAliasScopes.analyse(&Inst);
6076 }
6077 }
6078
6079 // If this is a branch or switch on a constant, mark only the single
6080 // live successor. Otherwise assume all successors are live.
6081 Instruction *TI = BB->getTerminator();
6082 if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
6083 if (isa<UndefValue>(BI->getCondition())) {
6084 // Branch on undef is UB.
6085 HandleOnlyLiveSuccessor(BB, nullptr);
6086 continue;
6087 }
6088 if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
6089 bool CondVal = Cond->getZExtValue();
6090 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(!CondVal));
6091 continue;
6092 }
6093 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
6094 if (isa<UndefValue>(SI->getCondition())) {
6095 // Switch on undef is UB.
6096 HandleOnlyLiveSuccessor(BB, nullptr);
6097 continue;
6098 }
6099 if (auto *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
6100 HandleOnlyLiveSuccessor(BB,
6101 SI->findCaseValue(Cond)->getCaseSuccessor());
6102 continue;
6103 }
6104 }
6105 }
6106
6107 // Remove instructions inside unreachable blocks. This prevents the
6108 // instcombine code from having to deal with some bad special cases, and
6109 // reduces use counts of instructions.
6110 for (BasicBlock &BB : F) {
6111 if (LiveBlocks.count(&BB))
6112 continue;
6113
6114 unsigned NumDeadInstInBB;
6115 NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(&BB);
6116
6117 MadeIRChange |= NumDeadInstInBB != 0;
6118 NumDeadInst += NumDeadInstInBB;
6119 }
6120
6121 // Once we've found all of the instructions to add to instcombine's worklist,
6122 // add them in reverse order. This way instcombine will visit from the top
6123 // of the function down. This jives well with the way that it adds all uses
6124 // of instructions to the worklist after doing a transformation, thus avoiding
6125 // some N^2 behavior in pathological cases.
6126 Worklist.reserve(InstrsForInstructionWorklist.size());
6127 for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
6128 // DCE instruction if trivially dead. As we iterate in reverse program
6129 // order here, we will clean up whole chains of dead instructions.
6130 if (isInstructionTriviallyDead(Inst, &TLI) ||
6131 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
6132 ++NumDeadInst;
6133 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
6134 salvageDebugInfo(*Inst);
6135 Inst->eraseFromParent();
6136 MadeIRChange = true;
6137 continue;
6138 }
6139
6140 Worklist.push(Inst);
6141 }
6142
6143 return MadeIRChange;
6144}
6145
6147 // Collect backedges.
6148 SmallVector<bool> Visited(F.getMaxBlockNumber());
6149 for (BasicBlock *BB : RPOT) {
6150 Visited[BB->getNumber()] = true;
6151 for (BasicBlock *Succ : successors(BB))
6152 if (Visited[Succ->getNumber()])
6153 BackEdges.insert({BB, Succ});
6154 }
6155 ComputedBackEdges = true;
6156}
6157
6163 const InstCombineOptions &Opts) {
6164 auto &DL = F.getDataLayout();
6165 bool VerifyFixpoint = Opts.VerifyFixpoint &&
6166 !F.hasFnAttribute("instcombine-no-verify-fixpoint");
6167
6168 /// Builder - This is an IRBuilder that automatically inserts new
6169 /// instructions into the worklist when they are created.
6171 F.getContext(), TargetFolder(DL),
6172 IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
6173 Worklist.add(I);
6174 if (auto *Assume = dyn_cast<AssumeInst>(I))
6175 AC.registerAssumption(Assume);
6176 }));
6177
6179
6180 // Lower dbg.declare intrinsics otherwise their value may be clobbered
6181 // by instcombiner.
6182 bool MadeIRChange = false;
6184 MadeIRChange = LowerDbgDeclare(F);
6185
6186 // Iterate while there is work to do.
6187 unsigned Iteration = 0;
6188 while (true) {
6189 if (Iteration >= Opts.MaxIterations && !VerifyFixpoint) {
6190 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
6191 << " on " << F.getName()
6192 << " reached; stopping without verifying fixpoint\n");
6193 break;
6194 }
6195
6196 ++Iteration;
6197 ++NumWorklistIterations;
6198 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
6199 << F.getName() << "\n");
6200
6201 InstCombinerImpl IC(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI,
6202 BPI, PSI, DL, RPOT);
6204 bool MadeChangeInThisIteration = IC.prepareWorklist(F);
6205 MadeChangeInThisIteration |= IC.run();
6206 if (!MadeChangeInThisIteration)
6207 break;
6208
6209 MadeIRChange = true;
6210 if (Iteration > Opts.MaxIterations) {
6212 "Instruction Combining on " + Twine(F.getName()) +
6213 " did not reach a fixpoint after " + Twine(Opts.MaxIterations) +
6214 " iterations. " +
6215 "Use 'instcombine<no-verify-fixpoint>' or function attribute "
6216 "'instcombine-no-verify-fixpoint' to suppress this error.");
6217 }
6218 }
6219
6220 if (Iteration == 1)
6221 ++NumOneIteration;
6222 else if (Iteration == 2)
6223 ++NumTwoIterations;
6224 else if (Iteration == 3)
6225 ++NumThreeIterations;
6226 else
6227 ++NumFourOrMoreIterations;
6228
6229 return MadeIRChange;
6230}
6231
6233
6235 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
6236 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
6237 OS, MapClassName2PassName);
6238 OS << '<';
6239 OS << "max-iterations=" << Options.MaxIterations << ";";
6240 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
6241 OS << '>';
6242}
6243
6244char InstCombinePass::ID = 0;
6245
6248 auto &LRT = AM.getResult<LastRunTrackingAnalysis>(F);
6249 // No changes since last InstCombine pass, exit early.
6250 if (LRT.shouldSkip(&ID))
6251 return PreservedAnalyses::all();
6252
6253 auto &AC = AM.getResult<AssumptionAnalysis>(F);
6254 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
6255 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
6257 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
6258
6259 auto *AA = &AM.getResult<AAManager>(F);
6260 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
6261 ProfileSummaryInfo *PSI =
6262 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
6263 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
6264 &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
6266
6267 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6268 BFI, BPI, PSI, Options)) {
6269 // No changes, all analyses are preserved.
6270 LRT.update(&ID, /*Changed=*/false);
6271 return PreservedAnalyses::all();
6272 }
6273
6274 // Mark all the analyses that instcombine updates as preserved.
6276 LRT.update(&ID, /*Changed=*/true);
6279 return PA;
6280}
6281
6297
6299 if (skipFunction(F))
6300 return false;
6301
6302 // Required analyses.
6303 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
6304 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
6305 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
6307 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6309
6310 // Optional analyses.
6311 ProfileSummaryInfo *PSI =
6313 BlockFrequencyInfo *BFI =
6314 (PSI && PSI->hasProfileSummary()) ?
6316 nullptr;
6317 BranchProbabilityInfo *BPI = nullptr;
6318 if (auto *WrapperPass =
6320 BPI = &WrapperPass->getBPI();
6321
6322 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6323 BFI, BPI, PSI, InstCombineOptions());
6324}
6325
6327
6329
6331 "Combine redundant instructions", false, false)
6342 "Combine redundant instructions", false, false)
6343
6344// Initialization Routines.
6348
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
#define X(NUM, ENUM, NAME)
Definition ELF.h:849
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI)
DXIL Resource Access
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
iv Induction Variable Users
Definition IVUsers.cpp:48
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "(X ROp Y) LOp Z" is always equal to "(X LOp Z) ROp (Y LOp Z)".
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
This file provides internal interfaces used to implement the InstCombine.
This file provides the primary interface to the instcombine pass.
static Value * simplifySwitchOnSelectUsingRanges(SwitchInst &SI, SelectInst *Select, bool IsTrueArm)
static bool isUsedWithinShuffleVector(Value *V)
static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, Instruction *AI)
static Constant * constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector, Constant *Splat, bool SplatLHS, const DataLayout &DL)
static bool shorter_filter(const Value *LHS, const Value *RHS)
static Instruction * combineConstantOffsets(GetElementPtrInst &GEP, InstCombinerImpl &IC)
Combine constant offsets separated by variable offsets.
static Instruction * foldSelectGEP(GetElementPtrInst &GEP, InstCombiner::BuilderTy &Builder)
Thread a GEP operation with constant indices through the constant true/false arms of a select.
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src)
static cl::opt< unsigned > MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine"))
static Instruction * foldSpliceBinOp(BinaryOperator &Inst, InstCombiner::BuilderTy &Builder)
static cl::opt< unsigned > ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true))
static bool hasNoSignedWrap(BinaryOperator &I)
static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, InstCombinerImpl &IC)
Combine constant operands of associative operations either before or after a cast to eliminate one of...
static bool combineInstructionsOverFunction(Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, const InstCombineOptions &Opts)
static Value * simplifyInstructionWithPHI(Instruction &I, PHINode *PN, Value *InValue, BasicBlock *InBB, const DataLayout &DL, const SimplifyQuery SQ)
static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP)
Return true if we should canonicalize the gep to an i8 ptradd.
static void ClearSubclassDataAfterReassociation(BinaryOperator &I)
Conservatively clears subclassOptionalData after a reassociation or commutation.
static Value * getIdentityValue(Instruction::BinaryOps Opcode, Value *V)
This function returns identity value for given opcode, which can be used to factor patterns like (X *...
static Value * foldFrexpOfSelect(ExtractValueInst &EV, IntrinsicInst *FrexpCall, SelectInst *SelectInst, InstCombiner::BuilderTy &Builder)
static std::optional< std::pair< Value *, Value * > > matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS)
static Value * foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, Value *NewOp, InstCombiner &IC)
static Instruction * canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP, GEPOperator *Src, InstCombinerImpl &IC)
static Instruction * tryToMoveFreeBeforeNullTest(CallInst &FI, const DataLayout &DL)
Move the call to free before a NULL test.
static Value * simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI, bool IsTrueArm)
static Value * tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, InstCombiner::BuilderTy &Builder, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D)
This tries to simplify binary operations by factorizing out common terms (e.
static bool isRemovableWrite(CallBase &CB, Value *UsedV, const TargetLibraryInfo &TLI)
Given a call CB which uses an address UsedV, return true if we can prove the call's only possible eff...
static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS, BinaryOperator *OtherOp)
This function predicates factorization using distributive laws.
static bool hasNoUnsignedWrap(BinaryOperator &I)
static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI)
Check for case where the call writes to an otherwise dead alloca.
static cl::opt< unsigned > MaxSinkNumUsers("instcombine-max-sink-users", cl::init(32), cl::desc("Maximum number of undroppable users for instruction sinking"))
static Instruction * foldGEPOfPhi(GetElementPtrInst &GEP, PHINode *PN, IRBuilderBase &Builder)
static std::optional< ModRefInfo > isAllocSiteRemovable(Instruction *AI, SmallVectorImpl< WeakTrackingVH > &Users, const TargetLibraryInfo &TLI, bool KnowInit)
static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo)
Return 'true' if the given typeinfo will match anything.
static cl::opt< bool > EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), cl::init(true))
static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C)
static GEPNoWrapFlags getMergedGEPNoWrapFlags(GEPOperator &GEP1, GEPOperator &GEP2)
Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y)) transform.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static bool IsSelect(MachineInstr &MI)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > & Cond
static unsigned getNumElements(Type *Ty)
unsigned OpIndex
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
bool isNoAliasScopeDeclDead(Instruction *Inst)
void analyse(Instruction *I)
The Input class is used to parse a yaml document into in-memory structs and vectors.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:214
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1769
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
static LLVM_ABI void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Definition APInt.cpp:1901
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1939
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1971
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1157
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1952
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:219
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
uint64_t getNumElements() const
Type * getElementType() const
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
LLVM_ABI uint64_t getDereferenceableBytes() const
Returns the number of dereferenceable bytes from the dereferenceable attribute.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:261
Legacy wrapper pass to provide the BasicAAResult object.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:518
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI bool isEntryBlock() const
Return true if this is the entry block of the containing function.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:472
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI const_iterator getFirstNonPHIOrDbgOrAlloca() const
Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...
size_t size() const
Definition BasicBlock.h:470
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:294
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Analysis pass which computes BranchProbabilityInfo.
Analysis providing branch probability information.
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
void setAttributes(AttributeList A)
Set the attributes for this call.
bool doesNotThrow() const
Determine if the call cannot unwind.
Value * getArgOperand(unsigned i) const
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
ConstantArray - Constant Array Declarations.
Definition Constants.h:576
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition Constants.h:932
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNot(Constant *C)
static LLVM_ABI Constant * getAdd(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI ConstantRange makeExactNoWrapRegion(Instruction::BinaryOps BinOp, const APInt &Other, unsigned NoWrapKind)
Produce the range that contains X if and only if "X BinOp Other" does not wrap.
Constant Vector Declarations.
Definition Constants.h:660
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * replaceUndefsWith(Constant *C, Constant *Replacement)
Try to replace undefined constant C or undefined elements in C with Replacement.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
const Constant * stripPointerCasts() const
Definition Constant.h:219
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
static LLVM_ABI DIExpression * appendOpsToArg(const DIExpression *Expr, ArrayRef< uint64_t > Ops, unsigned ArgNo, bool StackValue=false)
Create a copy of Expr by appending the given list of Ops to each instance of the operand DW_OP_LLVM_a...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
static bool shouldExecute(CounterInfo &Counter)
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool empty() const
Definition DenseMap.h:109
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:316
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
iterator_range< idx_iterator > indices() const
idx_iterator idx_end() const
static ExtractValueInst * Create(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
idx_iterator idx_begin() const
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
FunctionPass(char &pid)
Definition Pass.h:316
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition Pass.cpp:188
const BasicBlock & getEntryBlock() const
Definition Function.h:809
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags inBounds()
static GEPNoWrapFlags all()
static GEPNoWrapFlags noUnsignedWrap()
GEPNoWrapFlags intersectForReassociate(GEPNoWrapFlags Other) const
Given (gep (gep p, x), y), determine the nowrap flags for (gep (gep, p, y), x).
bool hasNoUnsignedWrap() const
bool isInBounds() const
GEPNoWrapFlags intersectForOffsetAdd(GEPNoWrapFlags Other) const
Given (gep (gep p, x), y), determine the nowrap flags for (gep p, x+y).
static GEPNoWrapFlags none()
GEPNoWrapFlags getNoWrapFlags() const
Definition Operator.h:425
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static LLVM_ABI Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Legacy wrapper pass to provide the GlobalsAAResult object.
This instruction compares its operands according to the predicate given to the constructor.
CmpPredicate getCmpPredicate() const
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition IRBuilder.h:2048
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition IRBuilder.h:537
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2811
This instruction inserts a struct field of array element value into an aggregate value.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI InstCombinePass(InstCombineOptions Opts={})
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Instruction * foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I)
Tries to simplify binops of select and cast of the select condition.
Instruction * visitCondBrInst(CondBrInst &BI)
Instruction * foldBinOpIntoSelectOrPhi(BinaryOperator &I)
This is a convenience wrapper function for the above two functions.
bool SimplifyAssociativeOrCommutative(BinaryOperator &I)
Performs a few simplifications for operators which are associative or commutative.
Instruction * visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src)
Value * foldUsingDistributiveLaws(BinaryOperator &I)
Tries to simplify binary operations which some other binary operation distributes over.
Instruction * foldBinOpShiftWithShift(BinaryOperator &I)
Instruction * visitUnreachableInst(UnreachableInst &I)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
void handleUnreachableFrom(Instruction *I, SmallVectorImpl< BasicBlock * > &Worklist)
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * visitFreeze(FreezeInst &I)
Instruction * foldBinOpSelectBinOp(BinaryOperator &Op)
In some cases it is beneficial to fold a select into a binary operator.
void handlePotentiallyDeadBlocks(SmallVectorImpl< BasicBlock * > &Worklist)
bool prepareWorklist(Function &F)
Perform early cleanup and prepare the InstCombine worklist.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitExtractValueInst(ExtractValueInst &EV)
void handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc)
Instruction * foldBinopWithRecurrence(BinaryOperator &BO)
Try to fold binary operators whose operands are simple interleaved recurrences to a single recurrence...
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitLandingPadInst(LandingPadInst &LI)
Instruction * visitReturnInst(ReturnInst &RI)
Instruction * visitSwitchInst(SwitchInst &SI)
Instruction * foldBinopWithPhiOperands(BinaryOperator &BO)
For a binary operator with 2 phi operands, try to hoist the binary operation before the phi.
bool SimplifyDemandedFPClass(Instruction *I, unsigned Op, FPClassTest DemandedMask, KnownFPClass &Known, const SimplifyQuery &Q, unsigned Depth=0)
bool mergeStoreIntoSuccessor(StoreInst &SI)
Try to transform: if () { *P = v1; } else { *P = v2 } or: *P = v1; if () { *P = v2; }...
Instruction * tryFoldInstWithCtpopWithNot(Instruction *I)
Instruction * visitUncondBrInst(UncondBrInst &BI)
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Value * pushFreezeToPreventPoisonFromPropagating(FreezeInst &FI)
bool run()
Run the combiner over the entire worklist until it is empty.
Instruction * foldVectorBinop(BinaryOperator &Inst)
Canonicalize the position of binops relative to shufflevector.
bool removeInstructionsBeforeUnreachable(Instruction &I)
Value * SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS, Value *RHS)
void tryToSinkInstructionDbgVariableRecords(Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, BasicBlock *DestBlock, SmallVectorImpl< DbgVariableRecord * > &DPUsers)
void addDeadEdge(BasicBlock *From, BasicBlock *To, SmallVectorImpl< BasicBlock * > &Worklist)
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, ShMask) = C Returns nullptr if such a constant ...
Instruction * visitAllocSite(Instruction &FI)
Instruction * visitGetElementPtrInst(GetElementPtrInst &GEP)
Value * tryFactorizationFolds(BinaryOperator &I)
This tries to simplify binary operations by factorizing out common terms (e.
Instruction * foldFreezeIntoRecurrence(FreezeInst &I, PHINode *PN)
bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock)
Try to move the specified instruction from its current block into the beginning of DestBlock,...
bool freezeOtherUses(FreezeInst &FI)
void freelyInvertAllUsersOf(Value *V, Value *IgnoredUser=nullptr)
Freely adapt every user of V as-if V was changed to !V.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
static unsigned getComplexity(Value *V)
Assign a complexity or rank value to LLVM Values.
TargetLibraryInfo & TLI
unsigned ComputeNumSignBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
static bool shouldAvoidAbsorbingNotIntoSelect(const SelectInst &SI)
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
static bool isCanonicalPredicate(CmpPredicate Pred)
Predicate canonicalization reduces the number of patterns that need to be matched by other transforms...
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
BranchProbabilityInfo * BPI
ReversePostOrderTraversal< BasicBlock * > & RPOT
const DataLayout & DL
DomConditionCache DC
const bool MinimizeSize
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
void addToWorklist(Instruction *I)
Value * getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume, unsigned Depth)
Return nonnull value if V is free to invert under the condition of WillInvertAllUses.
SmallDenseSet< std::pair< const BasicBlock *, const BasicBlock * >, 8 > BackEdges
Backedges, used to avoid pushing instructions across backedges in cases where this may result in infi...
std::optional< Value * > targetSimplifyDemandedVectorEltsIntrinsic(IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
DominatorTree & DT
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
SmallDenseSet< std::pair< BasicBlock *, BasicBlock * >, 8 > DeadEdges
Edges that are known to never be taken.
std::optional< Value * > targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
BuilderTy & Builder
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
bool isBackEdge(const BasicBlock *From, const BasicBlock *To)
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
void visit(Iterator Start, Iterator End)
Definition InstVisitor.h:87
The legacy pass manager's instcombine pass.
Definition InstCombine.h:68
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
void add(Instruction *I)
Add instruction to the worklist.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isAssociative() const LLVM_READONLY
Return true if the instruction is associative:
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
bool isTerminator() const
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI bool willReturn() const LLVM_READONLY
Return true if the instruction will return (unwinding is considered as a form of returning control fl...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isBitwiseLogicOp() const
Return true if this is and/or/xor.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
bool isIntDivRem() const
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
A wrapper class for inspecting calls to intrinsic functions.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
The landingpad instruction holds all of the information necessary to generate correct exception handl...
bool isCleanup() const
Return 'true' if this landingpad instruction is a cleanup.
unsigned getNumClauses() const
Get the number of clauses for this landing pad.
static LLVM_ABI LandingPadInst * Create(Type *RetTy, unsigned NumReservedClauses, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedClauses is a hint for the number of incoming clauses that this landingpad w...
LLVM_ABI void addClause(Constant *ClauseVal)
Add a catch or filter clause to the landing pad.
bool isCatch(unsigned Idx) const
Return 'true' if the clause and index Idx is a catch clause.
bool isFilter(unsigned Idx) const
Return 'true' if the clause and index Idx is a filter clause.
Constant * getClause(unsigned Idx) const
Get the value of the clause at index Idx.
void setCleanup(bool V)
Indicate that this landingpad instruction is a cleanup.
A function/module analysis which provides an empty LastRunTrackingInfo.
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
Tracking metadata reference owned by Metadata.
Definition Metadata.h:902
This is the common base class for memset/memcpy/memmove.
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Root of the metadata hierarchy.
Definition Metadata.h:64
Value * getLHS() const
Value * getRHS() const
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MDNode * getScopeList() const
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:111
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:105
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
AnalysisType * getAnalysisIfAvailable() const
getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to get analysis information tha...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition Constants.h:1654
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition Registry.h:53
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
This class represents the LLVM 'select' instruction.
const Value * getFalseValue() const
const Value * getCondition() const
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
const Value * getTrueValue() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This instruction constructs a fixed permutation of two input vectors.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Multiway switch.
TargetFolder - Create constants with target dependent folding.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:65
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:311
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:278
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:328
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:310
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:110
Unconditional Branch instruction.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Use * op_iterator
Definition User.h:254
op_range operands()
Definition User.h:267
op_iterator op_begin()
Definition User.h:259
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition User.cpp:119
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
op_iterator op_end()
Definition User.h:261
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition Value.h:761
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
Definition Value.cpp:166
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:427
bool hasUseList() const
Check if this Value has a use-list.
Definition Value.h:345
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition Value.cpp:150
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
bool use_empty() const
Definition Value.h:347
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition Value.cpp:893
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Value handle that is nullable, but tries to track the Value.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
An efficient, type-erasing, non-owning reference to a callable.
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
reverse_self_iterator getReverseIterator()
Definition ilist_node.h:126
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrAdd_match< PointerOpTy, OffsetOpTy > m_PtrAdd(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp)
Matches GEP with i8 source element type.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
CmpClass_match< LHS, RHS, FCmpInst > m_FCmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
auto m_PtrToIntOrAddr(const OpTy &Op)
Matches PtrToInt or PtrToAddr.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
OneOps_match< OpTy, Instruction::Freeze > m_Freeze(const OpTy &Op)
Matches FreezeInst.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
br_match m_UnconditionalBr(BasicBlock *&Succ)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_idiv_op > m_IDiv(const LHS &L, const RHS &R)
Matches integer division operations.
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
constantexpr_match m_ConstantExpr()
Match a constant expression or a constant that contains a constant expression.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
match_combine_or< CastInst_match< OpTy, UIToFPInst >, CastInst_match< OpTy, SIToFPInst > > m_IToFP(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
NNegZExt_match< OpTy > m_NNegZExt(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
Splat_match< T > m_ConstantSplat(const T &SubPattern)
Match a constant splat. TODO: Extend this to non-constant splats.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
SelectLike_match< CondTy, LTy, RTy > m_SelectLike(const CondTy &C, const LTy &TrueC, const RTy &FalseC)
Matches a value that behaves like a boolean-controlled select, i.e.
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
BinaryOp_match< LHS, RHS, Instruction::SDiv > m_SDiv(const LHS &L, const RHS &R)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
cstfp_pred_ty< is_non_zero_fp > m_NonZeroFP()
Match a floating-point non-zero.
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > > > m_MaxOrMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_VectorInsert(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2116
LLVM_ABI void initializeInstructionCombiningPassPass(PassRegistry &)
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition Metadata.cpp:64
LLVM_ABI unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB)
Remove all instructions from a basic block other than its terminator and any present EH pad instructi...
Definition Local.cpp:2500
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI Value * simplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef< Value * > Indices, GEPNoWrapFlags NW, const SimplifyQuery &Q)
Given operands for a GetElementPtrInst, fold the result or return null.
LLVM_ABI Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
bool succ_empty(const Instruction *I)
Definition CFG.h:153
LLVM_ABI Value * simplifyFreezeInst(Value *Op, const SimplifyQuery &Q)
Given an operand for a Freeze, see if we can fold the result.
LLVM_ABI FunctionPass * createInstructionCombiningPass()
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1725
auto successors(const MachineBasicBlock *BB)
LLVM_ABI Constant * ConstantFoldInstruction(const Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstruction - Try to constant fold the specified instruction.
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
LLVM_ABI std::optional< StringRef > getAllocationFamily(const Value *I, const TargetLibraryInfo *TLI)
If a function is part of an allocation family (e.g.
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI Value * simplifyInstructionWithOperands(Instruction *I, ArrayRef< Value * > NewOps, const SimplifyQuery &Q)
Like simplifyInstruction but the operands of I are replaced with NewOps.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
gep_type_iterator gep_type_end(const User *GEP)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI Value * getReallocatedOperand(const CallBase *CB)
If this is a call to a realloc function, return the reallocated operand.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition APFloat.h:1622
LLVM_ABI bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc,...
LLVM_ABI bool handleUnreachableTerminator(Instruction *I, SmallVectorImpl< Value * > &PoisonedValues)
If a terminator in an unreachable basic block has an operand of type Instruction, transform it into p...
Definition Local.cpp:2483
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
LLVM_ABI Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
LLVM_ABI Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:403
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI Value * emitGEPOffset(IRBuilderBase *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition Local.cpp:22
constexpr unsigned MaxAnalysisRecursionDepth
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI bool LowerDbgDeclare(Function &F)
Lowers dbg.declare records into appropriate set of dbg.value records.
Definition Local.cpp:1810
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
generic_gep_type_iterator<> gep_type_iterator
LLVM_ABI void ConvertDebugDeclareToDebugValue(DbgVariableRecord *DVR, StoreInst *SI, DIBuilder &Builder)
Inserts a dbg.value record before a store to an alloca'd value that has an associated dbg....
Definition Local.cpp:1677
LLVM_ABI void salvageDebugInfoForDbgValues(Instruction &I, ArrayRef< DbgVariableRecord * > DPInsns)
Implementation of salvageDebugInfo, applying only to instructions in Insns, rather than all debug use...
Definition Local.cpp:2052
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI Value * simplifyExtractValueInst(Value *Agg, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an ExtractValueInst, fold the result or return null.
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition Local.cpp:2429
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition ModRef.h:28
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ ModRef
The access may reference and may modify the value stored in memory.
Definition ModRef.h:36
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
@ NoModRef
The access neither references nor modifies the value stored in memory.
Definition ModRef.h:30
TargetTransformInfo TTI
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2146
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void initializeInstCombine(PassRegistry &)
Initialize all passes linked into the InstCombine library.
LLVM_ABI void findDbgUsers(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the debug info records describing a value.
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
bool isRefSet(const ModRefInfo MRI)
Definition ModRef.h:52
LLVM_ABI std::optional< bool > isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, bool LHSIsTrue=true, unsigned Depth=0)
Return true if RHS is known to be implied true by LHS.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:267
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:264
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70
SimplifyQuery getWithInstruction(const Instruction *I) const