LLVM 19.0.0git
X86InstCombineIntrinsic.cpp
Go to the documentation of this file.
1//===-- X86InstCombineIntrinsic.cpp - X86 specific InstCombine pass -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements a TargetTransformInfo analysis pass specific to the
10/// X86 target machine. It uses the target's detailed information to provide
11/// more precise answers to certain TTI queries, while letting the target
12/// independent and default TTI implementations handle the rest.
13///
14//===----------------------------------------------------------------------===//
15
18#include "llvm/IR/IntrinsicsX86.h"
21#include <optional>
22
23using namespace llvm;
24
25#define DEBUG_TYPE "x86tti"
26
27/// Return a constant boolean vector that has true elements in all positions
28/// where the input constant data vector has an element with the sign bit set.
30 VectorType *IntTy = VectorType::getInteger(cast<VectorType>(V->getType()));
31 V = ConstantExpr::getBitCast(V, IntTy);
33 Constant::getNullValue(IntTy), V, DL);
34 assert(V && "Vector must be foldable");
35 return V;
36}
37
38/// Convert the x86 XMM integer vector mask to a vector of bools based on
39/// each element's most significant bit (the sign bit).
40static Value *getBoolVecFromMask(Value *Mask, const DataLayout &DL) {
41 // Fold Constant Mask.
42 if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask))
43 return getNegativeIsTrueBoolVec(ConstantMask, DL);
44
45 // Mask was extended from a boolean vector.
46 Value *ExtMask;
49 ExtMask->getType()->isIntOrIntVectorTy(1))
50 return ExtMask;
51
52 return nullptr;
53}
54
55// TODO: If the x86 backend knew how to convert a bool vector mask back to an
56// XMM register mask efficiently, we could transform all x86 masked intrinsics
57// to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
59 Value *Ptr = II.getOperand(0);
60 Value *Mask = II.getOperand(1);
62
63 // Zero Mask - masked load instruction creates a zero vector.
64 if (isa<ConstantAggregateZero>(Mask))
65 return IC.replaceInstUsesWith(II, ZeroVec);
66
67 // The mask is constant or extended from a bool vector. Convert this x86
68 // intrinsic to the LLVM intrinsic to allow target-independent optimizations.
69 if (Value *BoolMask = getBoolVecFromMask(Mask, IC.getDataLayout())) {
70 // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
71 // the LLVM intrinsic definition for the pointer argument.
72 unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
73 PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
74 Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
75
76 // The pass-through vector for an x86 masked load is a zero vector.
77 CallInst *NewMaskedLoad = IC.Builder.CreateMaskedLoad(
78 II.getType(), PtrCast, Align(1), BoolMask, ZeroVec);
79 return IC.replaceInstUsesWith(II, NewMaskedLoad);
80 }
81
82 return nullptr;
83}
84
85// TODO: If the x86 backend knew how to convert a bool vector mask back to an
86// XMM register mask efficiently, we could transform all x86 masked intrinsics
87// to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
89 Value *Ptr = II.getOperand(0);
90 Value *Mask = II.getOperand(1);
91 Value *Vec = II.getOperand(2);
92
93 // Zero Mask - this masked store instruction does nothing.
94 if (isa<ConstantAggregateZero>(Mask)) {
96 return true;
97 }
98
99 // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
100 // anything else at this level.
101 if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
102 return false;
103
104 // The mask is constant or extended from a bool vector. Convert this x86
105 // intrinsic to the LLVM intrinsic to allow target-independent optimizations.
106 if (Value *BoolMask = getBoolVecFromMask(Mask, IC.getDataLayout())) {
107 unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
108 PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
109 Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
110
111 IC.Builder.CreateMaskedStore(Vec, PtrCast, Align(1), BoolMask);
112
113 // 'Replace uses' doesn't work for stores. Erase the original masked store.
115 return true;
116 }
117
118 return false;
119}
120
122 InstCombiner::BuilderTy &Builder) {
123 bool LogicalShift = false;
124 bool ShiftLeft = false;
125 bool IsImm = false;
126
127 switch (II.getIntrinsicID()) {
128 default:
129 llvm_unreachable("Unexpected intrinsic!");
130 case Intrinsic::x86_sse2_psrai_d:
131 case Intrinsic::x86_sse2_psrai_w:
132 case Intrinsic::x86_avx2_psrai_d:
133 case Intrinsic::x86_avx2_psrai_w:
134 case Intrinsic::x86_avx512_psrai_q_128:
135 case Intrinsic::x86_avx512_psrai_q_256:
136 case Intrinsic::x86_avx512_psrai_d_512:
137 case Intrinsic::x86_avx512_psrai_q_512:
138 case Intrinsic::x86_avx512_psrai_w_512:
139 IsImm = true;
140 [[fallthrough]];
141 case Intrinsic::x86_sse2_psra_d:
142 case Intrinsic::x86_sse2_psra_w:
143 case Intrinsic::x86_avx2_psra_d:
144 case Intrinsic::x86_avx2_psra_w:
145 case Intrinsic::x86_avx512_psra_q_128:
146 case Intrinsic::x86_avx512_psra_q_256:
147 case Intrinsic::x86_avx512_psra_d_512:
148 case Intrinsic::x86_avx512_psra_q_512:
149 case Intrinsic::x86_avx512_psra_w_512:
150 LogicalShift = false;
151 ShiftLeft = false;
152 break;
153 case Intrinsic::x86_sse2_psrli_d:
154 case Intrinsic::x86_sse2_psrli_q:
155 case Intrinsic::x86_sse2_psrli_w:
156 case Intrinsic::x86_avx2_psrli_d:
157 case Intrinsic::x86_avx2_psrli_q:
158 case Intrinsic::x86_avx2_psrli_w:
159 case Intrinsic::x86_avx512_psrli_d_512:
160 case Intrinsic::x86_avx512_psrli_q_512:
161 case Intrinsic::x86_avx512_psrli_w_512:
162 IsImm = true;
163 [[fallthrough]];
164 case Intrinsic::x86_sse2_psrl_d:
165 case Intrinsic::x86_sse2_psrl_q:
166 case Intrinsic::x86_sse2_psrl_w:
167 case Intrinsic::x86_avx2_psrl_d:
168 case Intrinsic::x86_avx2_psrl_q:
169 case Intrinsic::x86_avx2_psrl_w:
170 case Intrinsic::x86_avx512_psrl_d_512:
171 case Intrinsic::x86_avx512_psrl_q_512:
172 case Intrinsic::x86_avx512_psrl_w_512:
173 LogicalShift = true;
174 ShiftLeft = false;
175 break;
176 case Intrinsic::x86_sse2_pslli_d:
177 case Intrinsic::x86_sse2_pslli_q:
178 case Intrinsic::x86_sse2_pslli_w:
179 case Intrinsic::x86_avx2_pslli_d:
180 case Intrinsic::x86_avx2_pslli_q:
181 case Intrinsic::x86_avx2_pslli_w:
182 case Intrinsic::x86_avx512_pslli_d_512:
183 case Intrinsic::x86_avx512_pslli_q_512:
184 case Intrinsic::x86_avx512_pslli_w_512:
185 IsImm = true;
186 [[fallthrough]];
187 case Intrinsic::x86_sse2_psll_d:
188 case Intrinsic::x86_sse2_psll_q:
189 case Intrinsic::x86_sse2_psll_w:
190 case Intrinsic::x86_avx2_psll_d:
191 case Intrinsic::x86_avx2_psll_q:
192 case Intrinsic::x86_avx2_psll_w:
193 case Intrinsic::x86_avx512_psll_d_512:
194 case Intrinsic::x86_avx512_psll_q_512:
195 case Intrinsic::x86_avx512_psll_w_512:
196 LogicalShift = true;
197 ShiftLeft = true;
198 break;
199 }
200 assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
201
202 Value *Vec = II.getArgOperand(0);
203 Value *Amt = II.getArgOperand(1);
204 auto *VT = cast<FixedVectorType>(Vec->getType());
205 Type *SVT = VT->getElementType();
206 Type *AmtVT = Amt->getType();
207 unsigned VWidth = VT->getNumElements();
208 unsigned BitWidth = SVT->getPrimitiveSizeInBits();
209
210 // If the shift amount is guaranteed to be in-range we can replace it with a
211 // generic shift. If its guaranteed to be out of range, logical shifts combine
212 // to zero and arithmetic shifts are clamped to (BitWidth - 1).
213 if (IsImm) {
214 assert(AmtVT->isIntegerTy(32) && "Unexpected shift-by-immediate type");
215 KnownBits KnownAmtBits =
217 if (KnownAmtBits.getMaxValue().ult(BitWidth)) {
218 Amt = Builder.CreateZExtOrTrunc(Amt, SVT);
219 Amt = Builder.CreateVectorSplat(VWidth, Amt);
220 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
221 : Builder.CreateLShr(Vec, Amt))
222 : Builder.CreateAShr(Vec, Amt));
223 }
224 if (KnownAmtBits.getMinValue().uge(BitWidth)) {
225 if (LogicalShift)
227 Amt = ConstantInt::get(SVT, BitWidth - 1);
228 return Builder.CreateAShr(Vec, Builder.CreateVectorSplat(VWidth, Amt));
229 }
230 } else {
231 // Ensure the first element has an in-range value and the rest of the
232 // elements in the bottom 64 bits are zero.
233 assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 &&
234 cast<VectorType>(AmtVT)->getElementType() == SVT &&
235 "Unexpected shift-by-scalar type");
236 unsigned NumAmtElts = cast<FixedVectorType>(AmtVT)->getNumElements();
237 APInt DemandedLower = APInt::getOneBitSet(NumAmtElts, 0);
238 APInt DemandedUpper = APInt::getBitsSet(NumAmtElts, 1, NumAmtElts / 2);
239 KnownBits KnownLowerBits = llvm::computeKnownBits(
240 Amt, DemandedLower, II.getModule()->getDataLayout());
241 KnownBits KnownUpperBits = llvm::computeKnownBits(
242 Amt, DemandedUpper, II.getModule()->getDataLayout());
243 if (KnownLowerBits.getMaxValue().ult(BitWidth) &&
244 (DemandedUpper.isZero() || KnownUpperBits.isZero())) {
245 SmallVector<int, 16> ZeroSplat(VWidth, 0);
246 Amt = Builder.CreateShuffleVector(Amt, ZeroSplat);
247 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
248 : Builder.CreateLShr(Vec, Amt))
249 : Builder.CreateAShr(Vec, Amt));
250 }
251 }
252
253 // Simplify if count is constant vector.
254 auto *CDV = dyn_cast<ConstantDataVector>(Amt);
255 if (!CDV)
256 return nullptr;
257
258 // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
259 // operand to compute the shift amount.
260 assert(AmtVT->isVectorTy() && AmtVT->getPrimitiveSizeInBits() == 128 &&
261 cast<VectorType>(AmtVT)->getElementType() == SVT &&
262 "Unexpected shift-by-scalar type");
263
264 // Concatenate the sub-elements to create the 64-bit value.
265 APInt Count(64, 0);
266 for (unsigned i = 0, NumSubElts = 64 / BitWidth; i != NumSubElts; ++i) {
267 unsigned SubEltIdx = (NumSubElts - 1) - i;
268 auto *SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
269 Count <<= BitWidth;
270 Count |= SubElt->getValue().zextOrTrunc(64);
271 }
272
273 // If shift-by-zero then just return the original value.
274 if (Count.isZero())
275 return Vec;
276
277 // Handle cases when Shift >= BitWidth.
278 if (Count.uge(BitWidth)) {
279 // If LogicalShift - just return zero.
280 if (LogicalShift)
282
283 // If ArithmeticShift - clamp Shift to (BitWidth - 1).
284 Count = APInt(64, BitWidth - 1);
285 }
286
287 // Get a constant vector of the same type as the first operand.
288 auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
289 auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
290
291 if (ShiftLeft)
292 return Builder.CreateShl(Vec, ShiftVec);
293
294 if (LogicalShift)
295 return Builder.CreateLShr(Vec, ShiftVec);
296
297 return Builder.CreateAShr(Vec, ShiftVec);
298}
299
300// Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
301// Unlike the generic IR shifts, the intrinsics have defined behaviour for out
302// of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
304 InstCombiner::BuilderTy &Builder) {
305 bool LogicalShift = false;
306 bool ShiftLeft = false;
307
308 switch (II.getIntrinsicID()) {
309 default:
310 llvm_unreachable("Unexpected intrinsic!");
311 case Intrinsic::x86_avx2_psrav_d:
312 case Intrinsic::x86_avx2_psrav_d_256:
313 case Intrinsic::x86_avx512_psrav_q_128:
314 case Intrinsic::x86_avx512_psrav_q_256:
315 case Intrinsic::x86_avx512_psrav_d_512:
316 case Intrinsic::x86_avx512_psrav_q_512:
317 case Intrinsic::x86_avx512_psrav_w_128:
318 case Intrinsic::x86_avx512_psrav_w_256:
319 case Intrinsic::x86_avx512_psrav_w_512:
320 LogicalShift = false;
321 ShiftLeft = false;
322 break;
323 case Intrinsic::x86_avx2_psrlv_d:
324 case Intrinsic::x86_avx2_psrlv_d_256:
325 case Intrinsic::x86_avx2_psrlv_q:
326 case Intrinsic::x86_avx2_psrlv_q_256:
327 case Intrinsic::x86_avx512_psrlv_d_512:
328 case Intrinsic::x86_avx512_psrlv_q_512:
329 case Intrinsic::x86_avx512_psrlv_w_128:
330 case Intrinsic::x86_avx512_psrlv_w_256:
331 case Intrinsic::x86_avx512_psrlv_w_512:
332 LogicalShift = true;
333 ShiftLeft = false;
334 break;
335 case Intrinsic::x86_avx2_psllv_d:
336 case Intrinsic::x86_avx2_psllv_d_256:
337 case Intrinsic::x86_avx2_psllv_q:
338 case Intrinsic::x86_avx2_psllv_q_256:
339 case Intrinsic::x86_avx512_psllv_d_512:
340 case Intrinsic::x86_avx512_psllv_q_512:
341 case Intrinsic::x86_avx512_psllv_w_128:
342 case Intrinsic::x86_avx512_psllv_w_256:
343 case Intrinsic::x86_avx512_psllv_w_512:
344 LogicalShift = true;
345 ShiftLeft = true;
346 break;
347 }
348 assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
349
350 Value *Vec = II.getArgOperand(0);
351 Value *Amt = II.getArgOperand(1);
352 auto *VT = cast<FixedVectorType>(II.getType());
353 Type *SVT = VT->getElementType();
354 int NumElts = VT->getNumElements();
355 int BitWidth = SVT->getIntegerBitWidth();
356
357 // If the shift amount is guaranteed to be in-range we can replace it with a
358 // generic shift.
359 KnownBits KnownAmt =
361 if (KnownAmt.getMaxValue().ult(BitWidth)) {
362 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
363 : Builder.CreateLShr(Vec, Amt))
364 : Builder.CreateAShr(Vec, Amt));
365 }
366
367 // Simplify if all shift amounts are constant/undef.
368 auto *CShift = dyn_cast<Constant>(Amt);
369 if (!CShift)
370 return nullptr;
371
372 // Collect each element's shift amount.
373 // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
374 bool AnyOutOfRange = false;
375 SmallVector<int, 8> ShiftAmts;
376 for (int I = 0; I < NumElts; ++I) {
377 auto *CElt = CShift->getAggregateElement(I);
378 if (isa_and_nonnull<UndefValue>(CElt)) {
379 ShiftAmts.push_back(-1);
380 continue;
381 }
382
383 auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
384 if (!COp)
385 return nullptr;
386
387 // Handle out of range shifts.
388 // If LogicalShift - set to BitWidth (special case).
389 // If ArithmeticShift - set to (BitWidth - 1) (sign splat).
390 APInt ShiftVal = COp->getValue();
391 if (ShiftVal.uge(BitWidth)) {
392 AnyOutOfRange = LogicalShift;
393 ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
394 continue;
395 }
396
397 ShiftAmts.push_back((int)ShiftVal.getZExtValue());
398 }
399
400 // If all elements out of range or UNDEF, return vector of zeros/undefs.
401 // ArithmeticShift should only hit this if they are all UNDEF.
402 auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
403 if (llvm::all_of(ShiftAmts, OutOfRange)) {
404 SmallVector<Constant *, 8> ConstantVec;
405 for (int Idx : ShiftAmts) {
406 if (Idx < 0) {
407 ConstantVec.push_back(UndefValue::get(SVT));
408 } else {
409 assert(LogicalShift && "Logical shift expected");
410 ConstantVec.push_back(ConstantInt::getNullValue(SVT));
411 }
412 }
413 return ConstantVector::get(ConstantVec);
414 }
415
416 // We can't handle only some out of range values with generic logical shifts.
417 if (AnyOutOfRange)
418 return nullptr;
419
420 // Build the shift amount constant vector.
421 SmallVector<Constant *, 8> ShiftVecAmts;
422 for (int Idx : ShiftAmts) {
423 if (Idx < 0)
424 ShiftVecAmts.push_back(UndefValue::get(SVT));
425 else
426 ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
427 }
428 auto ShiftVec = ConstantVector::get(ShiftVecAmts);
429
430 if (ShiftLeft)
431 return Builder.CreateShl(Vec, ShiftVec);
432
433 if (LogicalShift)
434 return Builder.CreateLShr(Vec, ShiftVec);
435
436 return Builder.CreateAShr(Vec, ShiftVec);
437}
438
440 InstCombiner::BuilderTy &Builder, bool IsSigned) {
441 Value *Arg0 = II.getArgOperand(0);
442 Value *Arg1 = II.getArgOperand(1);
443 Type *ResTy = II.getType();
444
445 // Fast all undef handling.
446 if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
447 return UndefValue::get(ResTy);
448
449 auto *ArgTy = cast<FixedVectorType>(Arg0->getType());
450 unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
451 unsigned NumSrcElts = ArgTy->getNumElements();
452 assert(cast<FixedVectorType>(ResTy)->getNumElements() == (2 * NumSrcElts) &&
453 "Unexpected packing types");
454
455 unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
456 unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
457 unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
458 assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
459 "Unexpected packing types");
460
461 // Constant folding.
462 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
463 return nullptr;
464
465 // Clamp Values - signed/unsigned both use signed clamp values, but they
466 // differ on the min/max values.
467 APInt MinValue, MaxValue;
468 if (IsSigned) {
469 // PACKSS: Truncate signed value with signed saturation.
470 // Source values less than dst minint are saturated to minint.
471 // Source values greater than dst maxint are saturated to maxint.
472 MinValue =
473 APInt::getSignedMinValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
474 MaxValue =
475 APInt::getSignedMaxValue(DstScalarSizeInBits).sext(SrcScalarSizeInBits);
476 } else {
477 // PACKUS: Truncate signed value with unsigned saturation.
478 // Source values less than zero are saturated to zero.
479 // Source values greater than dst maxuint are saturated to maxuint.
480 MinValue = APInt::getZero(SrcScalarSizeInBits);
481 MaxValue = APInt::getLowBitsSet(SrcScalarSizeInBits, DstScalarSizeInBits);
482 }
483
484 auto *MinC = Constant::getIntegerValue(ArgTy, MinValue);
485 auto *MaxC = Constant::getIntegerValue(ArgTy, MaxValue);
486 Arg0 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg0, MinC), MinC, Arg0);
487 Arg1 = Builder.CreateSelect(Builder.CreateICmpSLT(Arg1, MinC), MinC, Arg1);
488 Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0);
489 Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);
490
491 // Shuffle clamped args together at the lane level.
492 SmallVector<int, 32> PackMask;
493 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
494 for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
495 PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane));
496 for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
497 PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
498 }
499 auto *Shuffle = Builder.CreateShuffleVector(Arg0, Arg1, PackMask);
500
501 // Truncate to dst size.
502 return Builder.CreateTrunc(Shuffle, ResTy);
503}
504
506 InstCombiner::BuilderTy &Builder) {
507 Value *Arg = II.getArgOperand(0);
508 Type *ResTy = II.getType();
509
510 // movmsk(undef) -> zero as we must ensure the upper bits are zero.
511 if (isa<UndefValue>(Arg))
512 return Constant::getNullValue(ResTy);
513
514 auto *ArgTy = dyn_cast<FixedVectorType>(Arg->getType());
515 // We can't easily peek through x86_mmx types.
516 if (!ArgTy)
517 return nullptr;
518
519 // Expand MOVMSK to compare/bitcast/zext:
520 // e.g. PMOVMSKB(v16i8 x):
521 // %cmp = icmp slt <16 x i8> %x, zeroinitializer
522 // %int = bitcast <16 x i1> %cmp to i16
523 // %res = zext i16 %int to i32
524 unsigned NumElts = ArgTy->getNumElements();
525 Type *IntegerTy = Builder.getIntNTy(NumElts);
526
527 Value *Res = Builder.CreateBitCast(Arg, VectorType::getInteger(ArgTy));
528 Res = Builder.CreateIsNeg(Res);
529 Res = Builder.CreateBitCast(Res, IntegerTy);
530 Res = Builder.CreateZExtOrTrunc(Res, ResTy);
531 return Res;
532}
533
535 InstCombiner::BuilderTy &Builder) {
536 Value *CarryIn = II.getArgOperand(0);
537 Value *Op1 = II.getArgOperand(1);
538 Value *Op2 = II.getArgOperand(2);
539 Type *RetTy = II.getType();
540 Type *OpTy = Op1->getType();
541 assert(RetTy->getStructElementType(0)->isIntegerTy(8) &&
542 RetTy->getStructElementType(1) == OpTy && OpTy == Op2->getType() &&
543 "Unexpected types for x86 addcarry");
544
545 // If carry-in is zero, this is just an unsigned add with overflow.
546 if (match(CarryIn, PatternMatch::m_ZeroInt())) {
547 Value *UAdd = Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy,
548 {Op1, Op2});
549 // The types have to be adjusted to match the x86 call types.
550 Value *UAddResult = Builder.CreateExtractValue(UAdd, 0);
551 Value *UAddOV = Builder.CreateZExt(Builder.CreateExtractValue(UAdd, 1),
552 Builder.getInt8Ty());
554 Res = Builder.CreateInsertValue(Res, UAddOV, 0);
555 return Builder.CreateInsertValue(Res, UAddResult, 1);
556 }
557
558 return nullptr;
559}
560
562 InstCombiner::BuilderTy &Builder) {
563
564 auto *ArgImm = dyn_cast<ConstantInt>(II.getArgOperand(3));
565 if (!ArgImm || ArgImm->getValue().uge(256))
566 return nullptr;
567
568 Value *ArgA = II.getArgOperand(0);
569 Value *ArgB = II.getArgOperand(1);
570 Value *ArgC = II.getArgOperand(2);
571
572 Type *Ty = II.getType();
573
574 auto Or = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {
575 return {Builder.CreateOr(Lhs.first, Rhs.first), Lhs.second | Rhs.second};
576 };
577 auto Xor = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {
578 return {Builder.CreateXor(Lhs.first, Rhs.first), Lhs.second ^ Rhs.second};
579 };
580 auto And = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {
581 return {Builder.CreateAnd(Lhs.first, Rhs.first), Lhs.second & Rhs.second};
582 };
583 auto Not = [&](auto V) -> std::pair<Value *, uint8_t> {
584 return {Builder.CreateNot(V.first), ~V.second};
585 };
586 auto Nor = [&](auto Lhs, auto Rhs) { return Not(Or(Lhs, Rhs)); };
587 auto Xnor = [&](auto Lhs, auto Rhs) { return Not(Xor(Lhs, Rhs)); };
588 auto Nand = [&](auto Lhs, auto Rhs) { return Not(And(Lhs, Rhs)); };
589
590 bool AIsConst = match(ArgA, PatternMatch::m_ImmConstant());
591 bool BIsConst = match(ArgB, PatternMatch::m_ImmConstant());
592 bool CIsConst = match(ArgC, PatternMatch::m_ImmConstant());
593
594 bool ABIsConst = AIsConst && BIsConst;
595 bool ACIsConst = AIsConst && CIsConst;
596 bool BCIsConst = BIsConst && CIsConst;
597 bool ABCIsConst = AIsConst && BIsConst && CIsConst;
598
599 // Use for verification. Its a big table. Its difficult to go from Imm ->
600 // logic ops, but easy to verify that a set of logic ops is correct. We track
601 // the logic ops through the second value in the pair. At the end it should
602 // equal Imm.
603 std::pair<Value *, uint8_t> A = {ArgA, 0xf0};
604 std::pair<Value *, uint8_t> B = {ArgB, 0xcc};
605 std::pair<Value *, uint8_t> C = {ArgC, 0xaa};
606 std::pair<Value *, uint8_t> Res = {nullptr, 0};
607
608 // Currently we only handle cases that convert directly to another instruction
609 // or cases where all the ops are constant. This is because we don't properly
610 // handle creating ternary ops in the backend, so splitting them here may
611 // cause regressions. As the backend improves, uncomment more cases.
612
613 uint8_t Imm = ArgImm->getValue().getZExtValue();
614 switch (Imm) {
615 case 0x0:
616 Res = {Constant::getNullValue(Ty), 0};
617 break;
618 case 0x1:
619 if (ABCIsConst)
620 Res = Nor(Or(A, B), C);
621 break;
622 case 0x2:
623 if (ABCIsConst)
624 Res = And(Nor(A, B), C);
625 break;
626 case 0x3:
627 if (ABIsConst)
628 Res = Nor(A, B);
629 break;
630 case 0x4:
631 if (ABCIsConst)
632 Res = And(Nor(A, C), B);
633 break;
634 case 0x5:
635 if (ACIsConst)
636 Res = Nor(A, C);
637 break;
638 case 0x6:
639 if (ABCIsConst)
640 Res = Nor(A, Xnor(B, C));
641 break;
642 case 0x7:
643 if (ABCIsConst)
644 Res = Nor(A, And(B, C));
645 break;
646 case 0x8:
647 if (ABCIsConst)
648 Res = Nor(A, Nand(B, C));
649 break;
650 case 0x9:
651 if (ABCIsConst)
652 Res = Nor(A, Xor(B, C));
653 break;
654 case 0xa:
655 if (ACIsConst)
656 Res = Nor(A, Not(C));
657 break;
658 case 0xb:
659 if (ABCIsConst)
660 Res = Nor(A, Nor(C, Not(B)));
661 break;
662 case 0xc:
663 if (ABIsConst)
664 Res = Nor(A, Not(B));
665 break;
666 case 0xd:
667 if (ABCIsConst)
668 Res = Nor(A, Nor(B, Not(C)));
669 break;
670 case 0xe:
671 if (ABCIsConst)
672 Res = Nor(A, Nor(B, C));
673 break;
674 case 0xf:
675 Res = Not(A);
676 break;
677 case 0x10:
678 if (ABCIsConst)
679 Res = And(A, Nor(B, C));
680 break;
681 case 0x11:
682 if (BCIsConst)
683 Res = Nor(B, C);
684 break;
685 case 0x12:
686 if (ABCIsConst)
687 Res = Nor(Xnor(A, C), B);
688 break;
689 case 0x13:
690 if (ABCIsConst)
691 Res = Nor(And(A, C), B);
692 break;
693 case 0x14:
694 if (ABCIsConst)
695 Res = Nor(Xnor(A, B), C);
696 break;
697 case 0x15:
698 if (ABCIsConst)
699 Res = Nor(And(A, B), C);
700 break;
701 case 0x16:
702 if (ABCIsConst)
703 Res = Xor(Xor(A, B), And(Nand(A, B), C));
704 break;
705 case 0x17:
706 if (ABCIsConst)
707 Res = Xor(Or(A, B), Or(Xnor(A, B), C));
708 break;
709 case 0x18:
710 if (ABCIsConst)
711 Res = Nor(Xnor(A, B), Xnor(A, C));
712 break;
713 case 0x19:
714 if (ABCIsConst)
715 Res = And(Nand(A, B), Xnor(B, C));
716 break;
717 case 0x1a:
718 if (ABCIsConst)
719 Res = Xor(A, Or(And(A, B), C));
720 break;
721 case 0x1b:
722 if (ABCIsConst)
723 Res = Xor(A, Or(Xnor(A, B), C));
724 break;
725 case 0x1c:
726 if (ABCIsConst)
727 Res = Xor(A, Or(And(A, C), B));
728 break;
729 case 0x1d:
730 if (ABCIsConst)
731 Res = Xor(A, Or(Xnor(A, C), B));
732 break;
733 case 0x1e:
734 if (ABCIsConst)
735 Res = Xor(A, Or(B, C));
736 break;
737 case 0x1f:
738 if (ABCIsConst)
739 Res = Nand(A, Or(B, C));
740 break;
741 case 0x20:
742 if (ABCIsConst)
743 Res = Nor(Nand(A, C), B);
744 break;
745 case 0x21:
746 if (ABCIsConst)
747 Res = Nor(Xor(A, C), B);
748 break;
749 case 0x22:
750 if (BCIsConst)
751 Res = Nor(B, Not(C));
752 break;
753 case 0x23:
754 if (ABCIsConst)
755 Res = Nor(B, Nor(C, Not(A)));
756 break;
757 case 0x24:
758 if (ABCIsConst)
759 Res = Nor(Xnor(A, B), Xor(A, C));
760 break;
761 case 0x25:
762 if (ABCIsConst)
763 Res = Xor(A, Nand(Nand(A, B), C));
764 break;
765 case 0x26:
766 if (ABCIsConst)
767 Res = And(Nand(A, B), Xor(B, C));
768 break;
769 case 0x27:
770 if (ABCIsConst)
771 Res = Xor(Or(Xnor(A, B), C), B);
772 break;
773 case 0x28:
774 if (ABCIsConst)
775 Res = And(Xor(A, B), C);
776 break;
777 case 0x29:
778 if (ABCIsConst)
779 Res = Xor(Xor(A, B), Nor(And(A, B), C));
780 break;
781 case 0x2a:
782 if (ABCIsConst)
783 Res = And(Nand(A, B), C);
784 break;
785 case 0x2b:
786 if (ABCIsConst)
787 Res = Xor(Or(Xnor(A, B), Xor(A, C)), A);
788 break;
789 case 0x2c:
790 if (ABCIsConst)
791 Res = Nor(Xnor(A, B), Nor(B, C));
792 break;
793 case 0x2d:
794 if (ABCIsConst)
795 Res = Xor(A, Or(B, Not(C)));
796 break;
797 case 0x2e:
798 if (ABCIsConst)
799 Res = Xor(A, Or(Xor(A, C), B));
800 break;
801 case 0x2f:
802 if (ABCIsConst)
803 Res = Nand(A, Or(B, Not(C)));
804 break;
805 case 0x30:
806 if (ABIsConst)
807 Res = Nor(B, Not(A));
808 break;
809 case 0x31:
810 if (ABCIsConst)
811 Res = Nor(Nor(A, Not(C)), B);
812 break;
813 case 0x32:
814 if (ABCIsConst)
815 Res = Nor(Nor(A, C), B);
816 break;
817 case 0x33:
818 Res = Not(B);
819 break;
820 case 0x34:
821 if (ABCIsConst)
822 Res = And(Xor(A, B), Nand(B, C));
823 break;
824 case 0x35:
825 if (ABCIsConst)
826 Res = Xor(B, Or(A, Xnor(B, C)));
827 break;
828 case 0x36:
829 if (ABCIsConst)
830 Res = Xor(Or(A, C), B);
831 break;
832 case 0x37:
833 if (ABCIsConst)
834 Res = Nand(Or(A, C), B);
835 break;
836 case 0x38:
837 if (ABCIsConst)
838 Res = Nor(Xnor(A, B), Nor(A, C));
839 break;
840 case 0x39:
841 if (ABCIsConst)
842 Res = Xor(Or(A, Not(C)), B);
843 break;
844 case 0x3a:
845 if (ABCIsConst)
846 Res = Xor(B, Or(A, Xor(B, C)));
847 break;
848 case 0x3b:
849 if (ABCIsConst)
850 Res = Nand(Or(A, Not(C)), B);
851 break;
852 case 0x3c:
853 Res = Xor(A, B);
854 break;
855 case 0x3d:
856 if (ABCIsConst)
857 Res = Xor(A, Or(Nor(A, C), B));
858 break;
859 case 0x3e:
860 if (ABCIsConst)
861 Res = Xor(A, Or(Nor(A, Not(C)), B));
862 break;
863 case 0x3f:
864 if (ABIsConst)
865 Res = Nand(A, B);
866 break;
867 case 0x40:
868 if (ABCIsConst)
869 Res = Nor(Nand(A, B), C);
870 break;
871 case 0x41:
872 if (ABCIsConst)
873 Res = Nor(Xor(A, B), C);
874 break;
875 case 0x42:
876 if (ABCIsConst)
877 Res = Nor(Xor(A, B), Xnor(A, C));
878 break;
879 case 0x43:
880 if (ABCIsConst)
881 Res = Xor(A, Nand(Nand(A, C), B));
882 break;
883 case 0x44:
884 if (BCIsConst)
885 Res = Nor(C, Not(B));
886 break;
887 case 0x45:
888 if (ABCIsConst)
889 Res = Nor(Nor(B, Not(A)), C);
890 break;
891 case 0x46:
892 if (ABCIsConst)
893 Res = Xor(Or(And(A, C), B), C);
894 break;
895 case 0x47:
896 if (ABCIsConst)
897 Res = Xor(Or(Xnor(A, C), B), C);
898 break;
899 case 0x48:
900 if (ABCIsConst)
901 Res = And(Xor(A, C), B);
902 break;
903 case 0x49:
904 if (ABCIsConst)
905 Res = Xor(Or(Xnor(A, B), And(A, C)), C);
906 break;
907 case 0x4a:
908 if (ABCIsConst)
909 Res = Nor(Xnor(A, C), Nor(B, C));
910 break;
911 case 0x4b:
912 if (ABCIsConst)
913 Res = Xor(A, Or(C, Not(B)));
914 break;
915 case 0x4c:
916 if (ABCIsConst)
917 Res = And(Nand(A, C), B);
918 break;
919 case 0x4d:
920 if (ABCIsConst)
921 Res = Xor(Or(Xor(A, B), Xnor(A, C)), A);
922 break;
923 case 0x4e:
924 if (ABCIsConst)
925 Res = Xor(A, Or(Xor(A, B), C));
926 break;
927 case 0x4f:
928 if (ABCIsConst)
929 Res = Nand(A, Nand(B, Not(C)));
930 break;
931 case 0x50:
932 if (ACIsConst)
933 Res = Nor(C, Not(A));
934 break;
935 case 0x51:
936 if (ABCIsConst)
937 Res = Nor(Nor(A, Not(B)), C);
938 break;
939 case 0x52:
940 if (ABCIsConst)
941 Res = And(Xor(A, C), Nand(B, C));
942 break;
943 case 0x53:
944 if (ABCIsConst)
945 Res = Xor(Or(Xnor(B, C), A), C);
946 break;
947 case 0x54:
948 if (ABCIsConst)
949 Res = Nor(Nor(A, B), C);
950 break;
951 case 0x55:
952 Res = Not(C);
953 break;
954 case 0x56:
955 if (ABCIsConst)
956 Res = Xor(Or(A, B), C);
957 break;
958 case 0x57:
959 if (ABCIsConst)
960 Res = Nand(Or(A, B), C);
961 break;
962 case 0x58:
963 if (ABCIsConst)
964 Res = Nor(Nor(A, B), Xnor(A, C));
965 break;
966 case 0x59:
967 if (ABCIsConst)
968 Res = Xor(Or(A, Not(B)), C);
969 break;
970 case 0x5a:
971 Res = Xor(A, C);
972 break;
973 case 0x5b:
974 if (ABCIsConst)
975 Res = Xor(A, Or(Nor(A, B), C));
976 break;
977 case 0x5c:
978 if (ABCIsConst)
979 Res = Xor(Or(Xor(B, C), A), C);
980 break;
981 case 0x5d:
982 if (ABCIsConst)
983 Res = Nand(Or(A, Not(B)), C);
984 break;
985 case 0x5e:
986 if (ABCIsConst)
987 Res = Xor(A, Or(Nor(A, Not(B)), C));
988 break;
989 case 0x5f:
990 if (ACIsConst)
991 Res = Nand(A, C);
992 break;
993 case 0x60:
994 if (ABCIsConst)
995 Res = And(A, Xor(B, C));
996 break;
997 case 0x61:
998 if (ABCIsConst)
999 Res = Xor(Or(Xnor(A, B), And(B, C)), C);
1000 break;
1001 case 0x62:
1002 if (ABCIsConst)
1003 Res = Nor(Nor(A, C), Xnor(B, C));
1004 break;
1005 case 0x63:
1006 if (ABCIsConst)
1007 Res = Xor(B, Or(C, Not(A)));
1008 break;
1009 case 0x64:
1010 if (ABCIsConst)
1011 Res = Nor(Nor(A, B), Xnor(B, C));
1012 break;
1013 case 0x65:
1014 if (ABCIsConst)
1015 Res = Xor(Or(B, Not(A)), C);
1016 break;
1017 case 0x66:
1018 Res = Xor(B, C);
1019 break;
1020 case 0x67:
1021 if (ABCIsConst)
1022 Res = Or(Nor(A, B), Xor(B, C));
1023 break;
1024 case 0x68:
1025 if (ABCIsConst)
1026 Res = Xor(Xor(A, B), Nor(Nor(A, B), C));
1027 break;
1028 case 0x69:
1029 if (ABCIsConst)
1030 Res = Xor(Xnor(A, B), C);
1031 break;
1032 case 0x6a:
1033 if (ABCIsConst)
1034 Res = Xor(And(A, B), C);
1035 break;
1036 case 0x6b:
1037 if (ABCIsConst)
1038 Res = Or(Nor(A, B), Xor(Xnor(A, B), C));
1039 break;
1040 case 0x6c:
1041 if (ABCIsConst)
1042 Res = Xor(And(A, C), B);
1043 break;
1044 case 0x6d:
1045 if (ABCIsConst)
1046 Res = Xor(Or(Xnor(A, B), Nor(A, C)), C);
1047 break;
1048 case 0x6e:
1049 if (ABCIsConst)
1050 Res = Or(Nor(A, Not(B)), Xor(B, C));
1051 break;
1052 case 0x6f:
1053 if (ABCIsConst)
1054 Res = Nand(A, Xnor(B, C));
1055 break;
1056 case 0x70:
1057 if (ABCIsConst)
1058 Res = And(A, Nand(B, C));
1059 break;
1060 case 0x71:
1061 if (ABCIsConst)
1062 Res = Xor(Nor(Xor(A, B), Xor(A, C)), A);
1063 break;
1064 case 0x72:
1065 if (ABCIsConst)
1066 Res = Xor(Or(Xor(A, B), C), B);
1067 break;
1068 case 0x73:
1069 if (ABCIsConst)
1070 Res = Nand(Nand(A, Not(C)), B);
1071 break;
1072 case 0x74:
1073 if (ABCIsConst)
1074 Res = Xor(Or(Xor(A, C), B), C);
1075 break;
1076 case 0x75:
1077 if (ABCIsConst)
1078 Res = Nand(Nand(A, Not(B)), C);
1079 break;
1080 case 0x76:
1081 if (ABCIsConst)
1082 Res = Xor(B, Or(Nor(B, Not(A)), C));
1083 break;
1084 case 0x77:
1085 if (BCIsConst)
1086 Res = Nand(B, C);
1087 break;
1088 case 0x78:
1089 if (ABCIsConst)
1090 Res = Xor(A, And(B, C));
1091 break;
1092 case 0x79:
1093 if (ABCIsConst)
1094 Res = Xor(Or(Xnor(A, B), Nor(B, C)), C);
1095 break;
1096 case 0x7a:
1097 if (ABCIsConst)
1098 Res = Or(Xor(A, C), Nor(B, Not(A)));
1099 break;
1100 case 0x7b:
1101 if (ABCIsConst)
1102 Res = Nand(Xnor(A, C), B);
1103 break;
1104 case 0x7c:
1105 if (ABCIsConst)
1106 Res = Or(Xor(A, B), Nor(C, Not(A)));
1107 break;
1108 case 0x7d:
1109 if (ABCIsConst)
1110 Res = Nand(Xnor(A, B), C);
1111 break;
1112 case 0x7e:
1113 if (ABCIsConst)
1114 Res = Or(Xor(A, B), Xor(A, C));
1115 break;
1116 case 0x7f:
1117 if (ABCIsConst)
1118 Res = Nand(And(A, B), C);
1119 break;
1120 case 0x80:
1121 if (ABCIsConst)
1122 Res = And(And(A, B), C);
1123 break;
1124 case 0x81:
1125 if (ABCIsConst)
1126 Res = Nor(Xor(A, B), Xor(A, C));
1127 break;
1128 case 0x82:
1129 if (ABCIsConst)
1130 Res = And(Xnor(A, B), C);
1131 break;
1132 case 0x83:
1133 if (ABCIsConst)
1134 Res = Nor(Xor(A, B), Nor(C, Not(A)));
1135 break;
1136 case 0x84:
1137 if (ABCIsConst)
1138 Res = And(Xnor(A, C), B);
1139 break;
1140 case 0x85:
1141 if (ABCIsConst)
1142 Res = Nor(Xor(A, C), Nor(B, Not(A)));
1143 break;
1144 case 0x86:
1145 if (ABCIsConst)
1146 Res = Xor(Nor(Xnor(A, B), Nor(B, C)), C);
1147 break;
1148 case 0x87:
1149 if (ABCIsConst)
1150 Res = Xor(A, Nand(B, C));
1151 break;
1152 case 0x88:
1153 Res = And(B, C);
1154 break;
1155 case 0x89:
1156 if (ABCIsConst)
1157 Res = Xor(B, Nor(Nor(B, Not(A)), C));
1158 break;
1159 case 0x8a:
1160 if (ABCIsConst)
1161 Res = And(Nand(A, Not(B)), C);
1162 break;
1163 case 0x8b:
1164 if (ABCIsConst)
1165 Res = Xor(Nor(Xor(A, C), B), C);
1166 break;
1167 case 0x8c:
1168 if (ABCIsConst)
1169 Res = And(Nand(A, Not(C)), B);
1170 break;
1171 case 0x8d:
1172 if (ABCIsConst)
1173 Res = Xor(Nor(Xor(A, B), C), B);
1174 break;
1175 case 0x8e:
1176 if (ABCIsConst)
1177 Res = Xor(Or(Xor(A, B), Xor(A, C)), A);
1178 break;
1179 case 0x8f:
1180 if (ABCIsConst)
1181 Res = Nand(A, Nand(B, C));
1182 break;
1183 case 0x90:
1184 if (ABCIsConst)
1185 Res = And(A, Xnor(B, C));
1186 break;
1187 case 0x91:
1188 if (ABCIsConst)
1189 Res = Nor(Nor(A, Not(B)), Xor(B, C));
1190 break;
1191 case 0x92:
1192 if (ABCIsConst)
1193 Res = Xor(Nor(Xnor(A, B), Nor(A, C)), C);
1194 break;
1195 case 0x93:
1196 if (ABCIsConst)
1197 Res = Xor(Nand(A, C), B);
1198 break;
1199 case 0x94:
1200 if (ABCIsConst)
1201 Res = Nor(Nor(A, B), Xor(Xnor(A, B), C));
1202 break;
1203 case 0x95:
1204 if (ABCIsConst)
1205 Res = Xor(Nand(A, B), C);
1206 break;
1207 case 0x96:
1208 if (ABCIsConst)
1209 Res = Xor(Xor(A, B), C);
1210 break;
1211 case 0x97:
1212 if (ABCIsConst)
1213 Res = Xor(Xor(A, B), Or(Nor(A, B), C));
1214 break;
1215 case 0x98:
1216 if (ABCIsConst)
1217 Res = Nor(Nor(A, B), Xor(B, C));
1218 break;
1219 case 0x99:
1220 if (BCIsConst)
1221 Res = Xnor(B, C);
1222 break;
1223 case 0x9a:
1224 if (ABCIsConst)
1225 Res = Xor(Nor(B, Not(A)), C);
1226 break;
1227 case 0x9b:
1228 if (ABCIsConst)
1229 Res = Or(Nor(A, B), Xnor(B, C));
1230 break;
1231 case 0x9c:
1232 if (ABCIsConst)
1233 Res = Xor(B, Nor(C, Not(A)));
1234 break;
1235 case 0x9d:
1236 if (ABCIsConst)
1237 Res = Or(Nor(A, C), Xnor(B, C));
1238 break;
1239 case 0x9e:
1240 if (ABCIsConst)
1241 Res = Xor(And(Xor(A, B), Nand(B, C)), C);
1242 break;
1243 case 0x9f:
1244 if (ABCIsConst)
1245 Res = Nand(A, Xor(B, C));
1246 break;
1247 case 0xa0:
1248 Res = And(A, C);
1249 break;
1250 case 0xa1:
1251 if (ABCIsConst)
1252 Res = Xor(A, Nor(Nor(A, Not(B)), C));
1253 break;
1254 case 0xa2:
1255 if (ABCIsConst)
1256 Res = And(Or(A, Not(B)), C);
1257 break;
1258 case 0xa3:
1259 if (ABCIsConst)
1260 Res = Xor(Nor(Xor(B, C), A), C);
1261 break;
1262 case 0xa4:
1263 if (ABCIsConst)
1264 Res = Xor(A, Nor(Nor(A, B), C));
1265 break;
1266 case 0xa5:
1267 if (ACIsConst)
1268 Res = Xnor(A, C);
1269 break;
1270 case 0xa6:
1271 if (ABCIsConst)
1272 Res = Xor(Nor(A, Not(B)), C);
1273 break;
1274 case 0xa7:
1275 if (ABCIsConst)
1276 Res = Or(Nor(A, B), Xnor(A, C));
1277 break;
1278 case 0xa8:
1279 if (ABCIsConst)
1280 Res = And(Or(A, B), C);
1281 break;
1282 case 0xa9:
1283 if (ABCIsConst)
1284 Res = Xor(Nor(A, B), C);
1285 break;
1286 case 0xaa:
1287 Res = C;
1288 break;
1289 case 0xab:
1290 if (ABCIsConst)
1291 Res = Or(Nor(A, B), C);
1292 break;
1293 case 0xac:
1294 if (ABCIsConst)
1295 Res = Xor(Nor(Xnor(B, C), A), C);
1296 break;
1297 case 0xad:
1298 if (ABCIsConst)
1299 Res = Or(Xnor(A, C), And(B, C));
1300 break;
1301 case 0xae:
1302 if (ABCIsConst)
1303 Res = Or(Nor(A, Not(B)), C);
1304 break;
1305 case 0xaf:
1306 if (ACIsConst)
1307 Res = Or(C, Not(A));
1308 break;
1309 case 0xb0:
1310 if (ABCIsConst)
1311 Res = And(A, Nand(B, Not(C)));
1312 break;
1313 case 0xb1:
1314 if (ABCIsConst)
1315 Res = Xor(A, Nor(Xor(A, B), C));
1316 break;
1317 case 0xb2:
1318 if (ABCIsConst)
1319 Res = Xor(Nor(Xor(A, B), Xnor(A, C)), A);
1320 break;
1321 case 0xb3:
1322 if (ABCIsConst)
1323 Res = Nand(Nand(A, C), B);
1324 break;
1325 case 0xb4:
1326 if (ABCIsConst)
1327 Res = Xor(A, Nor(C, Not(B)));
1328 break;
1329 case 0xb5:
1330 if (ABCIsConst)
1331 Res = Or(Xnor(A, C), Nor(B, C));
1332 break;
1333 case 0xb6:
1334 if (ABCIsConst)
1335 Res = Xor(And(Xor(A, B), Nand(A, C)), C);
1336 break;
1337 case 0xb7:
1338 if (ABCIsConst)
1339 Res = Nand(Xor(A, C), B);
1340 break;
1341 case 0xb8:
1342 if (ABCIsConst)
1343 Res = Xor(Nor(Xnor(A, C), B), C);
1344 break;
1345 case 0xb9:
1346 if (ABCIsConst)
1347 Res = Xor(Nor(And(A, C), B), C);
1348 break;
1349 case 0xba:
1350 if (ABCIsConst)
1351 Res = Or(Nor(B, Not(A)), C);
1352 break;
1353 case 0xbb:
1354 if (BCIsConst)
1355 Res = Or(C, Not(B));
1356 break;
1357 case 0xbc:
1358 if (ABCIsConst)
1359 Res = Xor(A, And(Nand(A, C), B));
1360 break;
1361 case 0xbd:
1362 if (ABCIsConst)
1363 Res = Or(Xor(A, B), Xnor(A, C));
1364 break;
1365 case 0xbe:
1366 if (ABCIsConst)
1367 Res = Or(Xor(A, B), C);
1368 break;
1369 case 0xbf:
1370 if (ABCIsConst)
1371 Res = Or(Nand(A, B), C);
1372 break;
1373 case 0xc0:
1374 Res = And(A, B);
1375 break;
1376 case 0xc1:
1377 if (ABCIsConst)
1378 Res = Xor(A, Nor(Nor(A, Not(C)), B));
1379 break;
1380 case 0xc2:
1381 if (ABCIsConst)
1382 Res = Xor(A, Nor(Nor(A, C), B));
1383 break;
1384 case 0xc3:
1385 if (ABIsConst)
1386 Res = Xnor(A, B);
1387 break;
1388 case 0xc4:
1389 if (ABCIsConst)
1390 Res = And(Or(A, Not(C)), B);
1391 break;
1392 case 0xc5:
1393 if (ABCIsConst)
1394 Res = Xor(B, Nor(A, Xor(B, C)));
1395 break;
1396 case 0xc6:
1397 if (ABCIsConst)
1398 Res = Xor(Nor(A, Not(C)), B);
1399 break;
1400 case 0xc7:
1401 if (ABCIsConst)
1402 Res = Or(Xnor(A, B), Nor(A, C));
1403 break;
1404 case 0xc8:
1405 if (ABCIsConst)
1406 Res = And(Or(A, C), B);
1407 break;
1408 case 0xc9:
1409 if (ABCIsConst)
1410 Res = Xor(Nor(A, C), B);
1411 break;
1412 case 0xca:
1413 if (ABCIsConst)
1414 Res = Xor(B, Nor(A, Xnor(B, C)));
1415 break;
1416 case 0xcb:
1417 if (ABCIsConst)
1418 Res = Or(Xnor(A, B), And(B, C));
1419 break;
1420 case 0xcc:
1421 Res = B;
1422 break;
1423 case 0xcd:
1424 if (ABCIsConst)
1425 Res = Or(Nor(A, C), B);
1426 break;
1427 case 0xce:
1428 if (ABCIsConst)
1429 Res = Or(Nor(A, Not(C)), B);
1430 break;
1431 case 0xcf:
1432 if (ABIsConst)
1433 Res = Or(B, Not(A));
1434 break;
1435 case 0xd0:
1436 if (ABCIsConst)
1437 Res = And(A, Or(B, Not(C)));
1438 break;
1439 case 0xd1:
1440 if (ABCIsConst)
1441 Res = Xor(A, Nor(Xor(A, C), B));
1442 break;
1443 case 0xd2:
1444 if (ABCIsConst)
1445 Res = Xor(A, Nor(B, Not(C)));
1446 break;
1447 case 0xd3:
1448 if (ABCIsConst)
1449 Res = Or(Xnor(A, B), Nor(B, C));
1450 break;
1451 case 0xd4:
1452 if (ABCIsConst)
1453 Res = Xor(Nor(Xnor(A, B), Xor(A, C)), A);
1454 break;
1455 case 0xd5:
1456 if (ABCIsConst)
1457 Res = Nand(Nand(A, B), C);
1458 break;
1459 case 0xd6:
1460 if (ABCIsConst)
1461 Res = Xor(Xor(A, B), Or(And(A, B), C));
1462 break;
1463 case 0xd7:
1464 if (ABCIsConst)
1465 Res = Nand(Xor(A, B), C);
1466 break;
1467 case 0xd8:
1468 if (ABCIsConst)
1469 Res = Xor(Nor(Xnor(A, B), C), B);
1470 break;
1471 case 0xd9:
1472 if (ABCIsConst)
1473 Res = Or(And(A, B), Xnor(B, C));
1474 break;
1475 case 0xda:
1476 if (ABCIsConst)
1477 Res = Xor(A, And(Nand(A, B), C));
1478 break;
1479 case 0xdb:
1480 if (ABCIsConst)
1481 Res = Or(Xnor(A, B), Xor(A, C));
1482 break;
1483 case 0xdc:
1484 if (ABCIsConst)
1485 Res = Or(B, Nor(C, Not(A)));
1486 break;
1487 case 0xdd:
1488 if (BCIsConst)
1489 Res = Or(B, Not(C));
1490 break;
1491 case 0xde:
1492 if (ABCIsConst)
1493 Res = Or(Xor(A, C), B);
1494 break;
1495 case 0xdf:
1496 if (ABCIsConst)
1497 Res = Or(Nand(A, C), B);
1498 break;
1499 case 0xe0:
1500 if (ABCIsConst)
1501 Res = And(A, Or(B, C));
1502 break;
1503 case 0xe1:
1504 if (ABCIsConst)
1505 Res = Xor(A, Nor(B, C));
1506 break;
1507 case 0xe2:
1508 if (ABCIsConst)
1509 Res = Xor(A, Nor(Xnor(A, C), B));
1510 break;
1511 case 0xe3:
1512 if (ABCIsConst)
1513 Res = Xor(A, Nor(And(A, C), B));
1514 break;
1515 case 0xe4:
1516 if (ABCIsConst)
1517 Res = Xor(A, Nor(Xnor(A, B), C));
1518 break;
1519 case 0xe5:
1520 if (ABCIsConst)
1521 Res = Xor(A, Nor(And(A, B), C));
1522 break;
1523 case 0xe6:
1524 if (ABCIsConst)
1525 Res = Or(And(A, B), Xor(B, C));
1526 break;
1527 case 0xe7:
1528 if (ABCIsConst)
1529 Res = Or(Xnor(A, B), Xnor(A, C));
1530 break;
1531 case 0xe8:
1532 if (ABCIsConst)
1533 Res = Xor(Or(A, B), Nor(Xnor(A, B), C));
1534 break;
1535 case 0xe9:
1536 if (ABCIsConst)
1537 Res = Xor(Xor(A, B), Nand(Nand(A, B), C));
1538 break;
1539 case 0xea:
1540 if (ABCIsConst)
1541 Res = Or(And(A, B), C);
1542 break;
1543 case 0xeb:
1544 if (ABCIsConst)
1545 Res = Or(Xnor(A, B), C);
1546 break;
1547 case 0xec:
1548 if (ABCIsConst)
1549 Res = Or(And(A, C), B);
1550 break;
1551 case 0xed:
1552 if (ABCIsConst)
1553 Res = Or(Xnor(A, C), B);
1554 break;
1555 case 0xee:
1556 Res = Or(B, C);
1557 break;
1558 case 0xef:
1559 if (ABCIsConst)
1560 Res = Nand(A, Nor(B, C));
1561 break;
1562 case 0xf0:
1563 Res = A;
1564 break;
1565 case 0xf1:
1566 if (ABCIsConst)
1567 Res = Or(A, Nor(B, C));
1568 break;
1569 case 0xf2:
1570 if (ABCIsConst)
1571 Res = Or(A, Nor(B, Not(C)));
1572 break;
1573 case 0xf3:
1574 if (ABIsConst)
1575 Res = Or(A, Not(B));
1576 break;
1577 case 0xf4:
1578 if (ABCIsConst)
1579 Res = Or(A, Nor(C, Not(B)));
1580 break;
1581 case 0xf5:
1582 if (ACIsConst)
1583 Res = Or(A, Not(C));
1584 break;
1585 case 0xf6:
1586 if (ABCIsConst)
1587 Res = Or(A, Xor(B, C));
1588 break;
1589 case 0xf7:
1590 if (ABCIsConst)
1591 Res = Or(A, Nand(B, C));
1592 break;
1593 case 0xf8:
1594 if (ABCIsConst)
1595 Res = Or(A, And(B, C));
1596 break;
1597 case 0xf9:
1598 if (ABCIsConst)
1599 Res = Or(A, Xnor(B, C));
1600 break;
1601 case 0xfa:
1602 Res = Or(A, C);
1603 break;
1604 case 0xfb:
1605 if (ABCIsConst)
1606 Res = Nand(Nor(A, C), B);
1607 break;
1608 case 0xfc:
1609 Res = Or(A, B);
1610 break;
1611 case 0xfd:
1612 if (ABCIsConst)
1613 Res = Nand(Nor(A, B), C);
1614 break;
1615 case 0xfe:
1616 if (ABCIsConst)
1617 Res = Or(Or(A, B), C);
1618 break;
1619 case 0xff:
1620 Res = {Constant::getAllOnesValue(Ty), 0xff};
1621 break;
1622 }
1623
1624 assert((Res.first == nullptr || Res.second == Imm) &&
1625 "Simplification of ternary logic does not verify!");
1626 return Res.first;
1627}
1628
1630 InstCombiner::BuilderTy &Builder) {
1631 auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
1632 if (!CInt)
1633 return nullptr;
1634
1635 auto *VecTy = cast<FixedVectorType>(II.getType());
1636 assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
1637
1638 // The immediate permute control byte looks like this:
1639 // [3:0] - zero mask for each 32-bit lane
1640 // [5:4] - select one 32-bit destination lane
1641 // [7:6] - select one 32-bit source lane
1642
1643 uint8_t Imm = CInt->getZExtValue();
1644 uint8_t ZMask = Imm & 0xf;
1645 uint8_t DestLane = (Imm >> 4) & 0x3;
1646 uint8_t SourceLane = (Imm >> 6) & 0x3;
1647
1649
1650 // If all zero mask bits are set, this was just a weird way to
1651 // generate a zero vector.
1652 if (ZMask == 0xf)
1653 return ZeroVector;
1654
1655 // Initialize by passing all of the first source bits through.
1656 int ShuffleMask[4] = {0, 1, 2, 3};
1657
1658 // We may replace the second operand with the zero vector.
1659 Value *V1 = II.getArgOperand(1);
1660
1661 if (ZMask) {
1662 // If the zero mask is being used with a single input or the zero mask
1663 // overrides the destination lane, this is a shuffle with the zero vector.
1664 if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
1665 (ZMask & (1 << DestLane))) {
1666 V1 = ZeroVector;
1667 // We may still move 32-bits of the first source vector from one lane
1668 // to another.
1669 ShuffleMask[DestLane] = SourceLane;
1670 // The zero mask may override the previous insert operation.
1671 for (unsigned i = 0; i < 4; ++i)
1672 if ((ZMask >> i) & 0x1)
1673 ShuffleMask[i] = i + 4;
1674 } else {
1675 // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
1676 return nullptr;
1677 }
1678 } else {
1679 // Replace the selected destination lane with the selected source lane.
1680 ShuffleMask[DestLane] = SourceLane + 4;
1681 }
1682
1683 return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
1684}
1685
1686/// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
1687/// or conversion to a shuffle vector.
1689 ConstantInt *CILength, ConstantInt *CIIndex,
1690 InstCombiner::BuilderTy &Builder) {
1691 auto LowConstantHighUndef = [&](uint64_t Val) {
1692 Type *IntTy64 = Type::getInt64Ty(II.getContext());
1693 Constant *Args[] = {ConstantInt::get(IntTy64, Val),
1694 UndefValue::get(IntTy64)};
1695 return ConstantVector::get(Args);
1696 };
1697
1698 // See if we're dealing with constant values.
1699 auto *C0 = dyn_cast<Constant>(Op0);
1700 auto *CI0 =
1701 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
1702 : nullptr;
1703
1704 // Attempt to constant fold.
1705 if (CILength && CIIndex) {
1706 // From AMD documentation: "The bit index and field length are each six
1707 // bits in length other bits of the field are ignored."
1708 APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
1709 APInt APLength = CILength->getValue().zextOrTrunc(6);
1710
1711 unsigned Index = APIndex.getZExtValue();
1712
1713 // From AMD documentation: "a value of zero in the field length is
1714 // defined as length of 64".
1715 unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
1716
1717 // From AMD documentation: "If the sum of the bit index + length field
1718 // is greater than 64, the results are undefined".
1719 unsigned End = Index + Length;
1720
1721 // Note that both field index and field length are 8-bit quantities.
1722 // Since variables 'Index' and 'Length' are unsigned values
1723 // obtained from zero-extending field index and field length
1724 // respectively, their sum should never wrap around.
1725 if (End > 64)
1726 return UndefValue::get(II.getType());
1727
1728 // If we are inserting whole bytes, we can convert this to a shuffle.
1729 // Lowering can recognize EXTRQI shuffle masks.
1730 if ((Length % 8) == 0 && (Index % 8) == 0) {
1731 // Convert bit indices to byte indices.
1732 Length /= 8;
1733 Index /= 8;
1734
1735 Type *IntTy8 = Type::getInt8Ty(II.getContext());
1736 auto *ShufTy = FixedVectorType::get(IntTy8, 16);
1737
1738 SmallVector<int, 16> ShuffleMask;
1739 for (int i = 0; i != (int)Length; ++i)
1740 ShuffleMask.push_back(i + Index);
1741 for (int i = Length; i != 8; ++i)
1742 ShuffleMask.push_back(i + 16);
1743 for (int i = 8; i != 16; ++i)
1744 ShuffleMask.push_back(-1);
1745
1746 Value *SV = Builder.CreateShuffleVector(
1747 Builder.CreateBitCast(Op0, ShufTy),
1748 ConstantAggregateZero::get(ShufTy), ShuffleMask);
1749 return Builder.CreateBitCast(SV, II.getType());
1750 }
1751
1752 // Constant Fold - shift Index'th bit to lowest position and mask off
1753 // Length bits.
1754 if (CI0) {
1755 APInt Elt = CI0->getValue();
1756 Elt.lshrInPlace(Index);
1757 Elt = Elt.zextOrTrunc(Length);
1758 return LowConstantHighUndef(Elt.getZExtValue());
1759 }
1760
1761 // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
1762 if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
1763 Value *Args[] = {Op0, CILength, CIIndex};
1764 Module *M = II.getModule();
1765 Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
1766 return Builder.CreateCall(F, Args);
1767 }
1768 }
1769
1770 // Constant Fold - extraction from zero is always {zero, undef}.
1771 if (CI0 && CI0->isZero())
1772 return LowConstantHighUndef(0);
1773
1774 return nullptr;
1775}
1776
1777/// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
1778/// folding or conversion to a shuffle vector.
1780 APInt APLength, APInt APIndex,
1781 InstCombiner::BuilderTy &Builder) {
1782 // From AMD documentation: "The bit index and field length are each six bits
1783 // in length other bits of the field are ignored."
1784 APIndex = APIndex.zextOrTrunc(6);
1785 APLength = APLength.zextOrTrunc(6);
1786
1787 // Attempt to constant fold.
1788 unsigned Index = APIndex.getZExtValue();
1789
1790 // From AMD documentation: "a value of zero in the field length is
1791 // defined as length of 64".
1792 unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
1793
1794 // From AMD documentation: "If the sum of the bit index + length field
1795 // is greater than 64, the results are undefined".
1796 unsigned End = Index + Length;
1797
1798 // Note that both field index and field length are 8-bit quantities.
1799 // Since variables 'Index' and 'Length' are unsigned values
1800 // obtained from zero-extending field index and field length
1801 // respectively, their sum should never wrap around.
1802 if (End > 64)
1803 return UndefValue::get(II.getType());
1804
1805 // If we are inserting whole bytes, we can convert this to a shuffle.
1806 // Lowering can recognize INSERTQI shuffle masks.
1807 if ((Length % 8) == 0 && (Index % 8) == 0) {
1808 // Convert bit indices to byte indices.
1809 Length /= 8;
1810 Index /= 8;
1811
1812 Type *IntTy8 = Type::getInt8Ty(II.getContext());
1813 auto *ShufTy = FixedVectorType::get(IntTy8, 16);
1814
1815 SmallVector<int, 16> ShuffleMask;
1816 for (int i = 0; i != (int)Index; ++i)
1817 ShuffleMask.push_back(i);
1818 for (int i = 0; i != (int)Length; ++i)
1819 ShuffleMask.push_back(i + 16);
1820 for (int i = Index + Length; i != 8; ++i)
1821 ShuffleMask.push_back(i);
1822 for (int i = 8; i != 16; ++i)
1823 ShuffleMask.push_back(-1);
1824
1825 Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
1826 Builder.CreateBitCast(Op1, ShufTy),
1827 ShuffleMask);
1828 return Builder.CreateBitCast(SV, II.getType());
1829 }
1830
1831 // See if we're dealing with constant values.
1832 auto *C0 = dyn_cast<Constant>(Op0);
1833 auto *C1 = dyn_cast<Constant>(Op1);
1834 auto *CI00 =
1835 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
1836 : nullptr;
1837 auto *CI10 =
1838 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
1839 : nullptr;
1840
1841 // Constant Fold - insert bottom Length bits starting at the Index'th bit.
1842 if (CI00 && CI10) {
1843 APInt V00 = CI00->getValue();
1844 APInt V10 = CI10->getValue();
1846 V00 = V00 & ~Mask;
1847 V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
1848 APInt Val = V00 | V10;
1849 Type *IntTy64 = Type::getInt64Ty(II.getContext());
1850 Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
1851 UndefValue::get(IntTy64)};
1852 return ConstantVector::get(Args);
1853 }
1854
1855 // If we were an INSERTQ call, we'll save demanded elements if we convert to
1856 // INSERTQI.
1857 if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
1858 Type *IntTy8 = Type::getInt8Ty(II.getContext());
1859 Constant *CILength = ConstantInt::get(IntTy8, Length, false);
1860 Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
1861
1862 Value *Args[] = {Op0, Op1, CILength, CIIndex};
1863 Module *M = II.getModule();
1864 Function *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
1865 return Builder.CreateCall(F, Args);
1866 }
1867
1868 return nullptr;
1869}
1870
1871/// Attempt to convert pshufb* to shufflevector if the mask is constant.
1873 InstCombiner::BuilderTy &Builder) {
1874 auto *V = dyn_cast<Constant>(II.getArgOperand(1));
1875 if (!V)
1876 return nullptr;
1877
1878 auto *VecTy = cast<FixedVectorType>(II.getType());
1879 unsigned NumElts = VecTy->getNumElements();
1880 assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
1881 "Unexpected number of elements in shuffle mask!");
1882
1883 // Construct a shuffle mask from constant integers or UNDEFs.
1884 int Indexes[64];
1885
1886 // Each byte in the shuffle control mask forms an index to permute the
1887 // corresponding byte in the destination operand.
1888 for (unsigned I = 0; I < NumElts; ++I) {
1889 Constant *COp = V->getAggregateElement(I);
1890 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1891 return nullptr;
1892
1893 if (isa<UndefValue>(COp)) {
1894 Indexes[I] = -1;
1895 continue;
1896 }
1897
1898 int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
1899
1900 // If the most significant bit (bit[7]) of each byte of the shuffle
1901 // control mask is set, then zero is written in the result byte.
1902 // The zero vector is in the right-hand side of the resulting
1903 // shufflevector.
1904
1905 // The value of each index for the high 128-bit lane is the least
1906 // significant 4 bits of the respective shuffle control byte.
1907 Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
1908 Indexes[I] = Index;
1909 }
1910
1911 auto V1 = II.getArgOperand(0);
1912 auto V2 = Constant::getNullValue(VecTy);
1913 return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts));
1914}
1915
1916/// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
1918 InstCombiner::BuilderTy &Builder) {
1919 auto *V = dyn_cast<Constant>(II.getArgOperand(1));
1920 if (!V)
1921 return nullptr;
1922
1923 auto *VecTy = cast<FixedVectorType>(II.getType());
1924 unsigned NumElts = VecTy->getNumElements();
1925 bool IsPD = VecTy->getScalarType()->isDoubleTy();
1926 unsigned NumLaneElts = IsPD ? 2 : 4;
1927 assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
1928
1929 // Construct a shuffle mask from constant integers or UNDEFs.
1930 int Indexes[16];
1931
1932 // The intrinsics only read one or two bits, clear the rest.
1933 for (unsigned I = 0; I < NumElts; ++I) {
1934 Constant *COp = V->getAggregateElement(I);
1935 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1936 return nullptr;
1937
1938 if (isa<UndefValue>(COp)) {
1939 Indexes[I] = -1;
1940 continue;
1941 }
1942
1943 APInt Index = cast<ConstantInt>(COp)->getValue();
1945
1946 // The PD variants uses bit 1 to select per-lane element index, so
1947 // shift down to convert to generic shuffle mask index.
1948 if (IsPD)
1949 Index.lshrInPlace(1);
1950
1951 // The _256 variants are a bit trickier since the mask bits always index
1952 // into the corresponding 128 half. In order to convert to a generic
1953 // shuffle, we have to make that explicit.
1954 Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
1955
1956 Indexes[I] = Index.getZExtValue();
1957 }
1958
1959 auto V1 = II.getArgOperand(0);
1960 return Builder.CreateShuffleVector(V1, ArrayRef(Indexes, NumElts));
1961}
1962
1963/// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
1965 InstCombiner::BuilderTy &Builder) {
1966 auto *V = dyn_cast<Constant>(II.getArgOperand(1));
1967 if (!V)
1968 return nullptr;
1969
1970 auto *VecTy = cast<FixedVectorType>(II.getType());
1971 unsigned Size = VecTy->getNumElements();
1972 assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
1973 "Unexpected shuffle mask size");
1974
1975 // Construct a shuffle mask from constant integers or UNDEFs.
1976 int Indexes[64];
1977
1978 for (unsigned I = 0; I < Size; ++I) {
1979 Constant *COp = V->getAggregateElement(I);
1980 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1981 return nullptr;
1982
1983 if (isa<UndefValue>(COp)) {
1984 Indexes[I] = -1;
1985 continue;
1986 }
1987
1988 uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
1989 Index &= Size - 1;
1990 Indexes[I] = Index;
1991 }
1992
1993 auto V1 = II.getArgOperand(0);
1994 return Builder.CreateShuffleVector(V1, ArrayRef(Indexes, Size));
1995}
1996
1997std::optional<Instruction *>
1999 auto SimplifyDemandedVectorEltsLow = [&IC](Value *Op, unsigned Width,
2000 unsigned DemandedWidth) {
2001 APInt UndefElts(Width, 0);
2002 APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
2003 return IC.SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
2004 };
2005
2006 Intrinsic::ID IID = II.getIntrinsicID();
2007 switch (IID) {
2008 case Intrinsic::x86_bmi_bextr_32:
2009 case Intrinsic::x86_bmi_bextr_64:
2010 case Intrinsic::x86_tbm_bextri_u32:
2011 case Intrinsic::x86_tbm_bextri_u64:
2012 // If the RHS is a constant we can try some simplifications.
2013 if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
2014 uint64_t Shift = C->getZExtValue();
2015 uint64_t Length = (Shift >> 8) & 0xff;
2016 Shift &= 0xff;
2017 unsigned BitWidth = II.getType()->getIntegerBitWidth();
2018 // If the length is 0 or the shift is out of range, replace with zero.
2019 if (Length == 0 || Shift >= BitWidth) {
2020 return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
2021 }
2022 // If the LHS is also a constant, we can completely constant fold this.
2023 if (auto *InC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
2024 uint64_t Result = InC->getZExtValue() >> Shift;
2025 if (Length > BitWidth)
2026 Length = BitWidth;
2027 Result &= maskTrailingOnes<uint64_t>(Length);
2028 return IC.replaceInstUsesWith(II,
2029 ConstantInt::get(II.getType(), Result));
2030 }
2031 // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we
2032 // are only masking bits that a shift already cleared?
2033 }
2034 break;
2035
2036 case Intrinsic::x86_bmi_bzhi_32:
2037 case Intrinsic::x86_bmi_bzhi_64:
2038 // If the RHS is a constant we can try some simplifications.
2039 if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
2040 uint64_t Index = C->getZExtValue() & 0xff;
2041 unsigned BitWidth = II.getType()->getIntegerBitWidth();
2042 if (Index >= BitWidth) {
2043 return IC.replaceInstUsesWith(II, II.getArgOperand(0));
2044 }
2045 if (Index == 0) {
2046 return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
2047 }
2048 // If the LHS is also a constant, we can completely constant fold this.
2049 if (auto *InC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
2050 uint64_t Result = InC->getZExtValue();
2051 Result &= maskTrailingOnes<uint64_t>(Index);
2052 return IC.replaceInstUsesWith(II,
2053 ConstantInt::get(II.getType(), Result));
2054 }
2055 // TODO should we convert this to an AND if the RHS is constant?
2056 }
2057 break;
2058 case Intrinsic::x86_bmi_pext_32:
2059 case Intrinsic::x86_bmi_pext_64:
2060 if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
2061 if (MaskC->isNullValue()) {
2062 return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
2063 }
2064 if (MaskC->isAllOnesValue()) {
2065 return IC.replaceInstUsesWith(II, II.getArgOperand(0));
2066 }
2067
2068 unsigned MaskIdx, MaskLen;
2069 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2070 // any single contingous sequence of 1s anywhere in the mask simply
2071 // describes a subset of the input bits shifted to the appropriate
2072 // position. Replace with the straight forward IR.
2073 Value *Input = II.getArgOperand(0);
2074 Value *Masked = IC.Builder.CreateAnd(Input, II.getArgOperand(1));
2075 Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx);
2076 Value *Shifted = IC.Builder.CreateLShr(Masked, ShiftAmt);
2077 return IC.replaceInstUsesWith(II, Shifted);
2078 }
2079
2080 if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
2081 uint64_t Src = SrcC->getZExtValue();
2082 uint64_t Mask = MaskC->getZExtValue();
2083 uint64_t Result = 0;
2084 uint64_t BitToSet = 1;
2085
2086 while (Mask) {
2087 // Isolate lowest set bit.
2088 uint64_t BitToTest = Mask & -Mask;
2089 if (BitToTest & Src)
2090 Result |= BitToSet;
2091
2092 BitToSet <<= 1;
2093 // Clear lowest set bit.
2094 Mask &= Mask - 1;
2095 }
2096
2097 return IC.replaceInstUsesWith(II,
2098 ConstantInt::get(II.getType(), Result));
2099 }
2100 }
2101 break;
2102 case Intrinsic::x86_bmi_pdep_32:
2103 case Intrinsic::x86_bmi_pdep_64:
2104 if (auto *MaskC = dyn_cast<ConstantInt>(II.getArgOperand(1))) {
2105 if (MaskC->isNullValue()) {
2106 return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), 0));
2107 }
2108 if (MaskC->isAllOnesValue()) {
2109 return IC.replaceInstUsesWith(II, II.getArgOperand(0));
2110 }
2111
2112 unsigned MaskIdx, MaskLen;
2113 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2114 // any single contingous sequence of 1s anywhere in the mask simply
2115 // describes a subset of the input bits shifted to the appropriate
2116 // position. Replace with the straight forward IR.
2117 Value *Input = II.getArgOperand(0);
2118 Value *ShiftAmt = ConstantInt::get(II.getType(), MaskIdx);
2119 Value *Shifted = IC.Builder.CreateShl(Input, ShiftAmt);
2120 Value *Masked = IC.Builder.CreateAnd(Shifted, II.getArgOperand(1));
2121 return IC.replaceInstUsesWith(II, Masked);
2122 }
2123
2124 if (auto *SrcC = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
2125 uint64_t Src = SrcC->getZExtValue();
2126 uint64_t Mask = MaskC->getZExtValue();
2127 uint64_t Result = 0;
2128 uint64_t BitToTest = 1;
2129
2130 while (Mask) {
2131 // Isolate lowest set bit.
2132 uint64_t BitToSet = Mask & -Mask;
2133 if (BitToTest & Src)
2134 Result |= BitToSet;
2135
2136 BitToTest <<= 1;
2137 // Clear lowest set bit;
2138 Mask &= Mask - 1;
2139 }
2140
2141 return IC.replaceInstUsesWith(II,
2142 ConstantInt::get(II.getType(), Result));
2143 }
2144 }
2145 break;
2146
2147 case Intrinsic::x86_sse_cvtss2si:
2148 case Intrinsic::x86_sse_cvtss2si64:
2149 case Intrinsic::x86_sse_cvttss2si:
2150 case Intrinsic::x86_sse_cvttss2si64:
2151 case Intrinsic::x86_sse2_cvtsd2si:
2152 case Intrinsic::x86_sse2_cvtsd2si64:
2153 case Intrinsic::x86_sse2_cvttsd2si:
2154 case Intrinsic::x86_sse2_cvttsd2si64:
2155 case Intrinsic::x86_avx512_vcvtss2si32:
2156 case Intrinsic::x86_avx512_vcvtss2si64:
2157 case Intrinsic::x86_avx512_vcvtss2usi32:
2158 case Intrinsic::x86_avx512_vcvtss2usi64:
2159 case Intrinsic::x86_avx512_vcvtsd2si32:
2160 case Intrinsic::x86_avx512_vcvtsd2si64:
2161 case Intrinsic::x86_avx512_vcvtsd2usi32:
2162 case Intrinsic::x86_avx512_vcvtsd2usi64:
2163 case Intrinsic::x86_avx512_cvttss2si:
2164 case Intrinsic::x86_avx512_cvttss2si64:
2165 case Intrinsic::x86_avx512_cvttss2usi:
2166 case Intrinsic::x86_avx512_cvttss2usi64:
2167 case Intrinsic::x86_avx512_cvttsd2si:
2168 case Intrinsic::x86_avx512_cvttsd2si64:
2169 case Intrinsic::x86_avx512_cvttsd2usi:
2170 case Intrinsic::x86_avx512_cvttsd2usi64: {
2171 // These intrinsics only demand the 0th element of their input vectors. If
2172 // we can simplify the input based on that, do so now.
2173 Value *Arg = II.getArgOperand(0);
2174 unsigned VWidth = cast<FixedVectorType>(Arg->getType())->getNumElements();
2175 if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2176 return IC.replaceOperand(II, 0, V);
2177 }
2178 break;
2179 }
2180
2181 case Intrinsic::x86_mmx_pmovmskb:
2182 case Intrinsic::x86_sse_movmsk_ps:
2183 case Intrinsic::x86_sse2_movmsk_pd:
2184 case Intrinsic::x86_sse2_pmovmskb_128:
2185 case Intrinsic::x86_avx_movmsk_pd_256:
2186 case Intrinsic::x86_avx_movmsk_ps_256:
2187 case Intrinsic::x86_avx2_pmovmskb:
2188 if (Value *V = simplifyX86movmsk(II, IC.Builder)) {
2189 return IC.replaceInstUsesWith(II, V);
2190 }
2191 break;
2192
2193 case Intrinsic::x86_sse_comieq_ss:
2194 case Intrinsic::x86_sse_comige_ss:
2195 case Intrinsic::x86_sse_comigt_ss:
2196 case Intrinsic::x86_sse_comile_ss:
2197 case Intrinsic::x86_sse_comilt_ss:
2198 case Intrinsic::x86_sse_comineq_ss:
2199 case Intrinsic::x86_sse_ucomieq_ss:
2200 case Intrinsic::x86_sse_ucomige_ss:
2201 case Intrinsic::x86_sse_ucomigt_ss:
2202 case Intrinsic::x86_sse_ucomile_ss:
2203 case Intrinsic::x86_sse_ucomilt_ss:
2204 case Intrinsic::x86_sse_ucomineq_ss:
2205 case Intrinsic::x86_sse2_comieq_sd:
2206 case Intrinsic::x86_sse2_comige_sd:
2207 case Intrinsic::x86_sse2_comigt_sd:
2208 case Intrinsic::x86_sse2_comile_sd:
2209 case Intrinsic::x86_sse2_comilt_sd:
2210 case Intrinsic::x86_sse2_comineq_sd:
2211 case Intrinsic::x86_sse2_ucomieq_sd:
2212 case Intrinsic::x86_sse2_ucomige_sd:
2213 case Intrinsic::x86_sse2_ucomigt_sd:
2214 case Intrinsic::x86_sse2_ucomile_sd:
2215 case Intrinsic::x86_sse2_ucomilt_sd:
2216 case Intrinsic::x86_sse2_ucomineq_sd:
2217 case Intrinsic::x86_avx512_vcomi_ss:
2218 case Intrinsic::x86_avx512_vcomi_sd:
2219 case Intrinsic::x86_avx512_mask_cmp_ss:
2220 case Intrinsic::x86_avx512_mask_cmp_sd: {
2221 // These intrinsics only demand the 0th element of their input vectors. If
2222 // we can simplify the input based on that, do so now.
2223 bool MadeChange = false;
2224 Value *Arg0 = II.getArgOperand(0);
2225 Value *Arg1 = II.getArgOperand(1);
2226 unsigned VWidth = cast<FixedVectorType>(Arg0->getType())->getNumElements();
2227 if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2228 IC.replaceOperand(II, 0, V);
2229 MadeChange = true;
2230 }
2231 if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2232 IC.replaceOperand(II, 1, V);
2233 MadeChange = true;
2234 }
2235 if (MadeChange) {
2236 return &II;
2237 }
2238 break;
2239 }
2240
2241 case Intrinsic::x86_avx512_add_ps_512:
2242 case Intrinsic::x86_avx512_div_ps_512:
2243 case Intrinsic::x86_avx512_mul_ps_512:
2244 case Intrinsic::x86_avx512_sub_ps_512:
2245 case Intrinsic::x86_avx512_add_pd_512:
2246 case Intrinsic::x86_avx512_div_pd_512:
2247 case Intrinsic::x86_avx512_mul_pd_512:
2248 case Intrinsic::x86_avx512_sub_pd_512:
2249 // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2250 // IR operations.
2251 if (auto *R = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
2252 if (R->getValue() == 4) {
2253 Value *Arg0 = II.getArgOperand(0);
2254 Value *Arg1 = II.getArgOperand(1);
2255
2256 Value *V;
2257 switch (IID) {
2258 default:
2259 llvm_unreachable("Case stmts out of sync!");
2260 case Intrinsic::x86_avx512_add_ps_512:
2261 case Intrinsic::x86_avx512_add_pd_512:
2262 V = IC.Builder.CreateFAdd(Arg0, Arg1);
2263 break;
2264 case Intrinsic::x86_avx512_sub_ps_512:
2265 case Intrinsic::x86_avx512_sub_pd_512:
2266 V = IC.Builder.CreateFSub(Arg0, Arg1);
2267 break;
2268 case Intrinsic::x86_avx512_mul_ps_512:
2269 case Intrinsic::x86_avx512_mul_pd_512:
2270 V = IC.Builder.CreateFMul(Arg0, Arg1);
2271 break;
2272 case Intrinsic::x86_avx512_div_ps_512:
2273 case Intrinsic::x86_avx512_div_pd_512:
2274 V = IC.Builder.CreateFDiv(Arg0, Arg1);
2275 break;
2276 }
2277
2278 return IC.replaceInstUsesWith(II, V);
2279 }
2280 }
2281 break;
2282
2283 case Intrinsic::x86_avx512_mask_add_ss_round:
2284 case Intrinsic::x86_avx512_mask_div_ss_round:
2285 case Intrinsic::x86_avx512_mask_mul_ss_round:
2286 case Intrinsic::x86_avx512_mask_sub_ss_round:
2287 case Intrinsic::x86_avx512_mask_add_sd_round:
2288 case Intrinsic::x86_avx512_mask_div_sd_round:
2289 case Intrinsic::x86_avx512_mask_mul_sd_round:
2290 case Intrinsic::x86_avx512_mask_sub_sd_round:
2291 // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2292 // IR operations.
2293 if (auto *R = dyn_cast<ConstantInt>(II.getArgOperand(4))) {
2294 if (R->getValue() == 4) {
2295 // Extract the element as scalars.
2296 Value *Arg0 = II.getArgOperand(0);
2297 Value *Arg1 = II.getArgOperand(1);
2300
2301 Value *V;
2302 switch (IID) {
2303 default:
2304 llvm_unreachable("Case stmts out of sync!");
2305 case Intrinsic::x86_avx512_mask_add_ss_round:
2306 case Intrinsic::x86_avx512_mask_add_sd_round:
2307 V = IC.Builder.CreateFAdd(LHS, RHS);
2308 break;
2309 case Intrinsic::x86_avx512_mask_sub_ss_round:
2310 case Intrinsic::x86_avx512_mask_sub_sd_round:
2311 V = IC.Builder.CreateFSub(LHS, RHS);
2312 break;
2313 case Intrinsic::x86_avx512_mask_mul_ss_round:
2314 case Intrinsic::x86_avx512_mask_mul_sd_round:
2315 V = IC.Builder.CreateFMul(LHS, RHS);
2316 break;
2317 case Intrinsic::x86_avx512_mask_div_ss_round:
2318 case Intrinsic::x86_avx512_mask_div_sd_round:
2319 V = IC.Builder.CreateFDiv(LHS, RHS);
2320 break;
2321 }
2322
2323 // Handle the masking aspect of the intrinsic.
2324 Value *Mask = II.getArgOperand(3);
2325 auto *C = dyn_cast<ConstantInt>(Mask);
2326 // We don't need a select if we know the mask bit is a 1.
2327 if (!C || !C->getValue()[0]) {
2328 // Cast the mask to an i1 vector and then extract the lowest element.
2329 auto *MaskTy = FixedVectorType::get(
2330 IC.Builder.getInt1Ty(),
2331 cast<IntegerType>(Mask->getType())->getBitWidth());
2332 Mask = IC.Builder.CreateBitCast(Mask, MaskTy);
2333 Mask = IC.Builder.CreateExtractElement(Mask, (uint64_t)0);
2334 // Extract the lowest element from the passthru operand.
2335 Value *Passthru =
2337 V = IC.Builder.CreateSelect(Mask, V, Passthru);
2338 }
2339
2340 // Insert the result back into the original argument 0.
2341 V = IC.Builder.CreateInsertElement(Arg0, V, (uint64_t)0);
2342
2343 return IC.replaceInstUsesWith(II, V);
2344 }
2345 }
2346 break;
2347
2348 // Constant fold ashr( <A x Bi>, Ci ).
2349 // Constant fold lshr( <A x Bi>, Ci ).
2350 // Constant fold shl( <A x Bi>, Ci ).
2351 case Intrinsic::x86_sse2_psrai_d:
2352 case Intrinsic::x86_sse2_psrai_w:
2353 case Intrinsic::x86_avx2_psrai_d:
2354 case Intrinsic::x86_avx2_psrai_w:
2355 case Intrinsic::x86_avx512_psrai_q_128:
2356 case Intrinsic::x86_avx512_psrai_q_256:
2357 case Intrinsic::x86_avx512_psrai_d_512:
2358 case Intrinsic::x86_avx512_psrai_q_512:
2359 case Intrinsic::x86_avx512_psrai_w_512:
2360 case Intrinsic::x86_sse2_psrli_d:
2361 case Intrinsic::x86_sse2_psrli_q:
2362 case Intrinsic::x86_sse2_psrli_w:
2363 case Intrinsic::x86_avx2_psrli_d:
2364 case Intrinsic::x86_avx2_psrli_q:
2365 case Intrinsic::x86_avx2_psrli_w:
2366 case Intrinsic::x86_avx512_psrli_d_512:
2367 case Intrinsic::x86_avx512_psrli_q_512:
2368 case Intrinsic::x86_avx512_psrli_w_512:
2369 case Intrinsic::x86_sse2_pslli_d:
2370 case Intrinsic::x86_sse2_pslli_q:
2371 case Intrinsic::x86_sse2_pslli_w:
2372 case Intrinsic::x86_avx2_pslli_d:
2373 case Intrinsic::x86_avx2_pslli_q:
2374 case Intrinsic::x86_avx2_pslli_w:
2375 case Intrinsic::x86_avx512_pslli_d_512:
2376 case Intrinsic::x86_avx512_pslli_q_512:
2377 case Intrinsic::x86_avx512_pslli_w_512:
2378 if (Value *V = simplifyX86immShift(II, IC.Builder)) {
2379 return IC.replaceInstUsesWith(II, V);
2380 }
2381 break;
2382
2383 case Intrinsic::x86_sse2_psra_d:
2384 case Intrinsic::x86_sse2_psra_w:
2385 case Intrinsic::x86_avx2_psra_d:
2386 case Intrinsic::x86_avx2_psra_w:
2387 case Intrinsic::x86_avx512_psra_q_128:
2388 case Intrinsic::x86_avx512_psra_q_256:
2389 case Intrinsic::x86_avx512_psra_d_512:
2390 case Intrinsic::x86_avx512_psra_q_512:
2391 case Intrinsic::x86_avx512_psra_w_512:
2392 case Intrinsic::x86_sse2_psrl_d:
2393 case Intrinsic::x86_sse2_psrl_q:
2394 case Intrinsic::x86_sse2_psrl_w:
2395 case Intrinsic::x86_avx2_psrl_d:
2396 case Intrinsic::x86_avx2_psrl_q:
2397 case Intrinsic::x86_avx2_psrl_w:
2398 case Intrinsic::x86_avx512_psrl_d_512:
2399 case Intrinsic::x86_avx512_psrl_q_512:
2400 case Intrinsic::x86_avx512_psrl_w_512:
2401 case Intrinsic::x86_sse2_psll_d:
2402 case Intrinsic::x86_sse2_psll_q:
2403 case Intrinsic::x86_sse2_psll_w:
2404 case Intrinsic::x86_avx2_psll_d:
2405 case Intrinsic::x86_avx2_psll_q:
2406 case Intrinsic::x86_avx2_psll_w:
2407 case Intrinsic::x86_avx512_psll_d_512:
2408 case Intrinsic::x86_avx512_psll_q_512:
2409 case Intrinsic::x86_avx512_psll_w_512: {
2410 if (Value *V = simplifyX86immShift(II, IC.Builder)) {
2411 return IC.replaceInstUsesWith(II, V);
2412 }
2413
2414 // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
2415 // operand to compute the shift amount.
2416 Value *Arg1 = II.getArgOperand(1);
2417 assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
2418 "Unexpected packed shift size");
2419 unsigned VWidth = cast<FixedVectorType>(Arg1->getType())->getNumElements();
2420
2421 if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2422 return IC.replaceOperand(II, 1, V);
2423 }
2424 break;
2425 }
2426
2427 case Intrinsic::x86_avx2_psllv_d:
2428 case Intrinsic::x86_avx2_psllv_d_256:
2429 case Intrinsic::x86_avx2_psllv_q:
2430 case Intrinsic::x86_avx2_psllv_q_256:
2431 case Intrinsic::x86_avx512_psllv_d_512:
2432 case Intrinsic::x86_avx512_psllv_q_512:
2433 case Intrinsic::x86_avx512_psllv_w_128:
2434 case Intrinsic::x86_avx512_psllv_w_256:
2435 case Intrinsic::x86_avx512_psllv_w_512:
2436 case Intrinsic::x86_avx2_psrav_d:
2437 case Intrinsic::x86_avx2_psrav_d_256:
2438 case Intrinsic::x86_avx512_psrav_q_128:
2439 case Intrinsic::x86_avx512_psrav_q_256:
2440 case Intrinsic::x86_avx512_psrav_d_512:
2441 case Intrinsic::x86_avx512_psrav_q_512:
2442 case Intrinsic::x86_avx512_psrav_w_128:
2443 case Intrinsic::x86_avx512_psrav_w_256:
2444 case Intrinsic::x86_avx512_psrav_w_512:
2445 case Intrinsic::x86_avx2_psrlv_d:
2446 case Intrinsic::x86_avx2_psrlv_d_256:
2447 case Intrinsic::x86_avx2_psrlv_q:
2448 case Intrinsic::x86_avx2_psrlv_q_256:
2449 case Intrinsic::x86_avx512_psrlv_d_512:
2450 case Intrinsic::x86_avx512_psrlv_q_512:
2451 case Intrinsic::x86_avx512_psrlv_w_128:
2452 case Intrinsic::x86_avx512_psrlv_w_256:
2453 case Intrinsic::x86_avx512_psrlv_w_512:
2454 if (Value *V = simplifyX86varShift(II, IC.Builder)) {
2455 return IC.replaceInstUsesWith(II, V);
2456 }
2457 break;
2458
2459 case Intrinsic::x86_sse2_packssdw_128:
2460 case Intrinsic::x86_sse2_packsswb_128:
2461 case Intrinsic::x86_avx2_packssdw:
2462 case Intrinsic::x86_avx2_packsswb:
2463 case Intrinsic::x86_avx512_packssdw_512:
2464 case Intrinsic::x86_avx512_packsswb_512:
2465 if (Value *V = simplifyX86pack(II, IC.Builder, true)) {
2466 return IC.replaceInstUsesWith(II, V);
2467 }
2468 break;
2469
2470 case Intrinsic::x86_sse2_packuswb_128:
2471 case Intrinsic::x86_sse41_packusdw:
2472 case Intrinsic::x86_avx2_packusdw:
2473 case Intrinsic::x86_avx2_packuswb:
2474 case Intrinsic::x86_avx512_packusdw_512:
2475 case Intrinsic::x86_avx512_packuswb_512:
2476 if (Value *V = simplifyX86pack(II, IC.Builder, false)) {
2477 return IC.replaceInstUsesWith(II, V);
2478 }
2479 break;
2480
2481 case Intrinsic::x86_pclmulqdq:
2482 case Intrinsic::x86_pclmulqdq_256:
2483 case Intrinsic::x86_pclmulqdq_512: {
2484 if (auto *C = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
2485 unsigned Imm = C->getZExtValue();
2486
2487 bool MadeChange = false;
2488 Value *Arg0 = II.getArgOperand(0);
2489 Value *Arg1 = II.getArgOperand(1);
2490 unsigned VWidth =
2491 cast<FixedVectorType>(Arg0->getType())->getNumElements();
2492
2493 APInt UndefElts1(VWidth, 0);
2494 APInt DemandedElts1 =
2495 APInt::getSplat(VWidth, APInt(2, (Imm & 0x01) ? 2 : 1));
2496 if (Value *V =
2497 IC.SimplifyDemandedVectorElts(Arg0, DemandedElts1, UndefElts1)) {
2498 IC.replaceOperand(II, 0, V);
2499 MadeChange = true;
2500 }
2501
2502 APInt UndefElts2(VWidth, 0);
2503 APInt DemandedElts2 =
2504 APInt::getSplat(VWidth, APInt(2, (Imm & 0x10) ? 2 : 1));
2505 if (Value *V =
2506 IC.SimplifyDemandedVectorElts(Arg1, DemandedElts2, UndefElts2)) {
2507 IC.replaceOperand(II, 1, V);
2508 MadeChange = true;
2509 }
2510
2511 // If either input elements are undef, the result is zero.
2512 if (DemandedElts1.isSubsetOf(UndefElts1) ||
2513 DemandedElts2.isSubsetOf(UndefElts2)) {
2514 return IC.replaceInstUsesWith(II,
2516 }
2517
2518 if (MadeChange) {
2519 return &II;
2520 }
2521 }
2522 break;
2523 }
2524
2525 case Intrinsic::x86_sse41_insertps:
2526 if (Value *V = simplifyX86insertps(II, IC.Builder)) {
2527 return IC.replaceInstUsesWith(II, V);
2528 }
2529 break;
2530
2531 case Intrinsic::x86_sse4a_extrq: {
2532 Value *Op0 = II.getArgOperand(0);
2533 Value *Op1 = II.getArgOperand(1);
2534 unsigned VWidth0 = cast<FixedVectorType>(Op0->getType())->getNumElements();
2535 unsigned VWidth1 = cast<FixedVectorType>(Op1->getType())->getNumElements();
2536 assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2537 Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2538 VWidth1 == 16 && "Unexpected operand sizes");
2539
2540 // See if we're dealing with constant values.
2541 auto *C1 = dyn_cast<Constant>(Op1);
2542 auto *CILength =
2543 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
2544 : nullptr;
2545 auto *CIIndex =
2546 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2547 : nullptr;
2548
2549 // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
2550 if (Value *V = simplifyX86extrq(II, Op0, CILength, CIIndex, IC.Builder)) {
2551 return IC.replaceInstUsesWith(II, V);
2552 }
2553
2554 // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
2555 // operands and the lowest 16-bits of the second.
2556 bool MadeChange = false;
2557 if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2558 IC.replaceOperand(II, 0, V);
2559 MadeChange = true;
2560 }
2561 if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2562 IC.replaceOperand(II, 1, V);
2563 MadeChange = true;
2564 }
2565 if (MadeChange) {
2566 return &II;
2567 }
2568 break;
2569 }
2570
2571 case Intrinsic::x86_sse4a_extrqi: {
2572 // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
2573 // bits of the lower 64-bits. The upper 64-bits are undefined.
2574 Value *Op0 = II.getArgOperand(0);
2575 unsigned VWidth = cast<FixedVectorType>(Op0->getType())->getNumElements();
2576 assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2577 "Unexpected operand size");
2578
2579 // See if we're dealing with constant values.
2580 auto *CILength = dyn_cast<ConstantInt>(II.getArgOperand(1));
2581 auto *CIIndex = dyn_cast<ConstantInt>(II.getArgOperand(2));
2582
2583 // Attempt to simplify to a constant or shuffle vector.
2584 if (Value *V = simplifyX86extrq(II, Op0, CILength, CIIndex, IC.Builder)) {
2585 return IC.replaceInstUsesWith(II, V);
2586 }
2587
2588 // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
2589 // operand.
2590 if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2591 return IC.replaceOperand(II, 0, V);
2592 }
2593 break;
2594 }
2595
2596 case Intrinsic::x86_sse4a_insertq: {
2597 Value *Op0 = II.getArgOperand(0);
2598 Value *Op1 = II.getArgOperand(1);
2599 unsigned VWidth = cast<FixedVectorType>(Op0->getType())->getNumElements();
2600 assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2601 Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2602 cast<FixedVectorType>(Op1->getType())->getNumElements() == 2 &&
2603 "Unexpected operand size");
2604
2605 // See if we're dealing with constant values.
2606 auto *C1 = dyn_cast<Constant>(Op1);
2607 auto *CI11 =
2608 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2609 : nullptr;
2610
2611 // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
2612 if (CI11) {
2613 const APInt &V11 = CI11->getValue();
2614 APInt Len = V11.zextOrTrunc(6);
2615 APInt Idx = V11.lshr(8).zextOrTrunc(6);
2616 if (Value *V = simplifyX86insertq(II, Op0, Op1, Len, Idx, IC.Builder)) {
2617 return IC.replaceInstUsesWith(II, V);
2618 }
2619 }
2620
2621 // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
2622 // operand.
2623 if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2624 return IC.replaceOperand(II, 0, V);
2625 }
2626 break;
2627 }
2628
2629 case Intrinsic::x86_sse4a_insertqi: {
2630 // INSERTQI: Extract lowest Length bits from lower half of second source and
2631 // insert over first source starting at Index bit. The upper 64-bits are
2632 // undefined.
2633 Value *Op0 = II.getArgOperand(0);
2634 Value *Op1 = II.getArgOperand(1);
2635 unsigned VWidth0 = cast<FixedVectorType>(Op0->getType())->getNumElements();
2636 unsigned VWidth1 = cast<FixedVectorType>(Op1->getType())->getNumElements();
2637 assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2638 Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2639 VWidth1 == 2 && "Unexpected operand sizes");
2640
2641 // See if we're dealing with constant values.
2642 auto *CILength = dyn_cast<ConstantInt>(II.getArgOperand(2));
2643 auto *CIIndex = dyn_cast<ConstantInt>(II.getArgOperand(3));
2644
2645 // Attempt to simplify to a constant or shuffle vector.
2646 if (CILength && CIIndex) {
2647 APInt Len = CILength->getValue().zextOrTrunc(6);
2648 APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2649 if (Value *V = simplifyX86insertq(II, Op0, Op1, Len, Idx, IC.Builder)) {
2650 return IC.replaceInstUsesWith(II, V);
2651 }
2652 }
2653
2654 // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
2655 // operands.
2656 bool MadeChange = false;
2657 if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2658 IC.replaceOperand(II, 0, V);
2659 MadeChange = true;
2660 }
2661 if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2662 IC.replaceOperand(II, 1, V);
2663 MadeChange = true;
2664 }
2665 if (MadeChange) {
2666 return &II;
2667 }
2668 break;
2669 }
2670
2671 case Intrinsic::x86_sse41_pblendvb:
2672 case Intrinsic::x86_sse41_blendvps:
2673 case Intrinsic::x86_sse41_blendvpd:
2674 case Intrinsic::x86_avx_blendv_ps_256:
2675 case Intrinsic::x86_avx_blendv_pd_256:
2676 case Intrinsic::x86_avx2_pblendvb: {
2677 // fold (blend A, A, Mask) -> A
2678 Value *Op0 = II.getArgOperand(0);
2679 Value *Op1 = II.getArgOperand(1);
2680 Value *Mask = II.getArgOperand(2);
2681 if (Op0 == Op1) {
2682 return IC.replaceInstUsesWith(II, Op0);
2683 }
2684
2685 // Zero Mask - select 1st argument.
2686 if (isa<ConstantAggregateZero>(Mask)) {
2687 return IC.replaceInstUsesWith(II, Op0);
2688 }
2689
2690 // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
2691 if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2692 Constant *NewSelector =
2693 getNegativeIsTrueBoolVec(ConstantMask, IC.getDataLayout());
2694 return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
2695 }
2696
2697 // Convert to a vector select if we can bypass casts and find a boolean
2698 // vector condition value.
2699 Value *BoolVec;
2701 if (match(Mask, PatternMatch::m_SExt(PatternMatch::m_Value(BoolVec))) &&
2702 BoolVec->getType()->isVectorTy() &&
2703 BoolVec->getType()->getScalarSizeInBits() == 1) {
2704 assert(Mask->getType()->getPrimitiveSizeInBits() ==
2706 "Not expecting mask and operands with different sizes");
2707
2708 unsigned NumMaskElts =
2709 cast<FixedVectorType>(Mask->getType())->getNumElements();
2710 unsigned NumOperandElts =
2711 cast<FixedVectorType>(II.getType())->getNumElements();
2712 if (NumMaskElts == NumOperandElts) {
2713 return SelectInst::Create(BoolVec, Op1, Op0);
2714 }
2715
2716 // If the mask has less elements than the operands, each mask bit maps to
2717 // multiple elements of the operands. Bitcast back and forth.
2718 if (NumMaskElts < NumOperandElts) {
2719 Value *CastOp0 = IC.Builder.CreateBitCast(Op0, Mask->getType());
2720 Value *CastOp1 = IC.Builder.CreateBitCast(Op1, Mask->getType());
2721 Value *Sel = IC.Builder.CreateSelect(BoolVec, CastOp1, CastOp0);
2722 return new BitCastInst(Sel, II.getType());
2723 }
2724 }
2725
2726 break;
2727 }
2728
2729 case Intrinsic::x86_ssse3_pshuf_b_128:
2730 case Intrinsic::x86_avx2_pshuf_b:
2731 case Intrinsic::x86_avx512_pshuf_b_512:
2732 if (Value *V = simplifyX86pshufb(II, IC.Builder)) {
2733 return IC.replaceInstUsesWith(II, V);
2734 }
2735 break;
2736
2737 case Intrinsic::x86_avx_vpermilvar_ps:
2738 case Intrinsic::x86_avx_vpermilvar_ps_256:
2739 case Intrinsic::x86_avx512_vpermilvar_ps_512:
2740 case Intrinsic::x86_avx_vpermilvar_pd:
2741 case Intrinsic::x86_avx_vpermilvar_pd_256:
2742 case Intrinsic::x86_avx512_vpermilvar_pd_512:
2743 if (Value *V = simplifyX86vpermilvar(II, IC.Builder)) {
2744 return IC.replaceInstUsesWith(II, V);
2745 }
2746 break;
2747
2748 case Intrinsic::x86_avx2_permd:
2749 case Intrinsic::x86_avx2_permps:
2750 case Intrinsic::x86_avx512_permvar_df_256:
2751 case Intrinsic::x86_avx512_permvar_df_512:
2752 case Intrinsic::x86_avx512_permvar_di_256:
2753 case Intrinsic::x86_avx512_permvar_di_512:
2754 case Intrinsic::x86_avx512_permvar_hi_128:
2755 case Intrinsic::x86_avx512_permvar_hi_256:
2756 case Intrinsic::x86_avx512_permvar_hi_512:
2757 case Intrinsic::x86_avx512_permvar_qi_128:
2758 case Intrinsic::x86_avx512_permvar_qi_256:
2759 case Intrinsic::x86_avx512_permvar_qi_512:
2760 case Intrinsic::x86_avx512_permvar_sf_512:
2761 case Intrinsic::x86_avx512_permvar_si_512:
2762 if (Value *V = simplifyX86vpermv(II, IC.Builder)) {
2763 return IC.replaceInstUsesWith(II, V);
2764 }
2765 break;
2766
2767 case Intrinsic::x86_avx_maskload_ps:
2768 case Intrinsic::x86_avx_maskload_pd:
2769 case Intrinsic::x86_avx_maskload_ps_256:
2770 case Intrinsic::x86_avx_maskload_pd_256:
2771 case Intrinsic::x86_avx2_maskload_d:
2772 case Intrinsic::x86_avx2_maskload_q:
2773 case Intrinsic::x86_avx2_maskload_d_256:
2774 case Intrinsic::x86_avx2_maskload_q_256:
2775 if (Instruction *I = simplifyX86MaskedLoad(II, IC)) {
2776 return I;
2777 }
2778 break;
2779
2780 case Intrinsic::x86_sse2_maskmov_dqu:
2781 case Intrinsic::x86_avx_maskstore_ps:
2782 case Intrinsic::x86_avx_maskstore_pd:
2783 case Intrinsic::x86_avx_maskstore_ps_256:
2784 case Intrinsic::x86_avx_maskstore_pd_256:
2785 case Intrinsic::x86_avx2_maskstore_d:
2786 case Intrinsic::x86_avx2_maskstore_q:
2787 case Intrinsic::x86_avx2_maskstore_d_256:
2788 case Intrinsic::x86_avx2_maskstore_q_256:
2789 if (simplifyX86MaskedStore(II, IC)) {
2790 return nullptr;
2791 }
2792 break;
2793
2794 case Intrinsic::x86_addcarry_32:
2795 case Intrinsic::x86_addcarry_64:
2796 if (Value *V = simplifyX86addcarry(II, IC.Builder)) {
2797 return IC.replaceInstUsesWith(II, V);
2798 }
2799 break;
2800
2801 case Intrinsic::x86_avx512_pternlog_d_128:
2802 case Intrinsic::x86_avx512_pternlog_d_256:
2803 case Intrinsic::x86_avx512_pternlog_d_512:
2804 case Intrinsic::x86_avx512_pternlog_q_128:
2805 case Intrinsic::x86_avx512_pternlog_q_256:
2806 case Intrinsic::x86_avx512_pternlog_q_512:
2807 if (Value *V = simplifyTernarylogic(II, IC.Builder)) {
2808 return IC.replaceInstUsesWith(II, V);
2809 }
2810 break;
2811 default:
2812 break;
2813 }
2814 return std::nullopt;
2815}
2816
2818 InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
2819 bool &KnownBitsComputed) const {
2820 switch (II.getIntrinsicID()) {
2821 default:
2822 break;
2823 case Intrinsic::x86_mmx_pmovmskb:
2824 case Intrinsic::x86_sse_movmsk_ps:
2825 case Intrinsic::x86_sse2_movmsk_pd:
2826 case Intrinsic::x86_sse2_pmovmskb_128:
2827 case Intrinsic::x86_avx_movmsk_ps_256:
2828 case Intrinsic::x86_avx_movmsk_pd_256:
2829 case Intrinsic::x86_avx2_pmovmskb: {
2830 // MOVMSK copies the vector elements' sign bits to the low bits
2831 // and zeros the high bits.
2832 unsigned ArgWidth;
2833 if (II.getIntrinsicID() == Intrinsic::x86_mmx_pmovmskb) {
2834 ArgWidth = 8; // Arg is x86_mmx, but treated as <8 x i8>.
2835 } else {
2836 auto *ArgType = cast<FixedVectorType>(II.getArgOperand(0)->getType());
2837 ArgWidth = ArgType->getNumElements();
2838 }
2839
2840 // If we don't need any of low bits then return zero,
2841 // we know that DemandedMask is non-zero already.
2842 APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth);
2843 Type *VTy = II.getType();
2844 if (DemandedElts.isZero()) {
2845 return ConstantInt::getNullValue(VTy);
2846 }
2847
2848 // We know that the upper bits are set to zero.
2849 Known.Zero.setBitsFrom(ArgWidth);
2850 KnownBitsComputed = true;
2851 break;
2852 }
2853 }
2854 return std::nullopt;
2855}
2856
2858 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2859 APInt &UndefElts2, APInt &UndefElts3,
2860 std::function<void(Instruction *, unsigned, APInt, APInt &)>
2861 simplifyAndSetOp) const {
2862 unsigned VWidth = cast<FixedVectorType>(II.getType())->getNumElements();
2863 switch (II.getIntrinsicID()) {
2864 default:
2865 break;
2866 case Intrinsic::x86_xop_vfrcz_ss:
2867 case Intrinsic::x86_xop_vfrcz_sd:
2868 // The instructions for these intrinsics are speced to zero upper bits not
2869 // pass them through like other scalar intrinsics. So we shouldn't just
2870 // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.
2871 // Instead we should return a zero vector.
2872 if (!DemandedElts[0]) {
2873 IC.addToWorklist(&II);
2875 }
2876
2877 // Only the lower element is used.
2878 DemandedElts = 1;
2879 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
2880
2881 // Only the lower element is undefined. The high elements are zero.
2882 UndefElts = UndefElts[0];
2883 break;
2884
2885 // Unary scalar-as-vector operations that work column-wise.
2886 case Intrinsic::x86_sse_rcp_ss:
2887 case Intrinsic::x86_sse_rsqrt_ss:
2888 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
2889
2890 // If lowest element of a scalar op isn't used then use Arg0.
2891 if (!DemandedElts[0]) {
2892 IC.addToWorklist(&II);
2893 return II.getArgOperand(0);
2894 }
2895 // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions
2896 // checks).
2897 break;
2898
2899 // Binary scalar-as-vector operations that work column-wise. The high
2900 // elements come from operand 0. The low element is a function of both
2901 // operands.
2902 case Intrinsic::x86_sse_min_ss:
2903 case Intrinsic::x86_sse_max_ss:
2904 case Intrinsic::x86_sse_cmp_ss:
2905 case Intrinsic::x86_sse2_min_sd:
2906 case Intrinsic::x86_sse2_max_sd:
2907 case Intrinsic::x86_sse2_cmp_sd: {
2908 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
2909
2910 // If lowest element of a scalar op isn't used then use Arg0.
2911 if (!DemandedElts[0]) {
2912 IC.addToWorklist(&II);
2913 return II.getArgOperand(0);
2914 }
2915
2916 // Only lower element is used for operand 1.
2917 DemandedElts = 1;
2918 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
2919
2920 // Lower element is undefined if both lower elements are undefined.
2921 // Consider things like undef&0. The result is known zero, not undef.
2922 if (!UndefElts2[0])
2923 UndefElts.clearBit(0);
2924
2925 break;
2926 }
2927
2928 // Binary scalar-as-vector operations that work column-wise. The high
2929 // elements come from operand 0 and the low element comes from operand 1.
2930 case Intrinsic::x86_sse41_round_ss:
2931 case Intrinsic::x86_sse41_round_sd: {
2932 // Don't use the low element of operand 0.
2933 APInt DemandedElts2 = DemandedElts;
2934 DemandedElts2.clearBit(0);
2935 simplifyAndSetOp(&II, 0, DemandedElts2, UndefElts);
2936
2937 // If lowest element of a scalar op isn't used then use Arg0.
2938 if (!DemandedElts[0]) {
2939 IC.addToWorklist(&II);
2940 return II.getArgOperand(0);
2941 }
2942
2943 // Only lower element is used for operand 1.
2944 DemandedElts = 1;
2945 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
2946
2947 // Take the high undef elements from operand 0 and take the lower element
2948 // from operand 1.
2949 UndefElts.clearBit(0);
2950 UndefElts |= UndefElts2[0];
2951 break;
2952 }
2953
2954 // Three input scalar-as-vector operations that work column-wise. The high
2955 // elements come from operand 0 and the low element is a function of all
2956 // three inputs.
2957 case Intrinsic::x86_avx512_mask_add_ss_round:
2958 case Intrinsic::x86_avx512_mask_div_ss_round:
2959 case Intrinsic::x86_avx512_mask_mul_ss_round:
2960 case Intrinsic::x86_avx512_mask_sub_ss_round:
2961 case Intrinsic::x86_avx512_mask_max_ss_round:
2962 case Intrinsic::x86_avx512_mask_min_ss_round:
2963 case Intrinsic::x86_avx512_mask_add_sd_round:
2964 case Intrinsic::x86_avx512_mask_div_sd_round:
2965 case Intrinsic::x86_avx512_mask_mul_sd_round:
2966 case Intrinsic::x86_avx512_mask_sub_sd_round:
2967 case Intrinsic::x86_avx512_mask_max_sd_round:
2968 case Intrinsic::x86_avx512_mask_min_sd_round:
2969 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
2970
2971 // If lowest element of a scalar op isn't used then use Arg0.
2972 if (!DemandedElts[0]) {
2973 IC.addToWorklist(&II);
2974 return II.getArgOperand(0);
2975 }
2976
2977 // Only lower element is used for operand 1 and 2.
2978 DemandedElts = 1;
2979 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
2980 simplifyAndSetOp(&II, 2, DemandedElts, UndefElts3);
2981
2982 // Lower element is undefined if all three lower elements are undefined.
2983 // Consider things like undef&0. The result is known zero, not undef.
2984 if (!UndefElts2[0] || !UndefElts3[0])
2985 UndefElts.clearBit(0);
2986 break;
2987
2988 // TODO: Add fmaddsub support?
2989 case Intrinsic::x86_sse3_addsub_pd:
2990 case Intrinsic::x86_sse3_addsub_ps:
2991 case Intrinsic::x86_avx_addsub_pd_256:
2992 case Intrinsic::x86_avx_addsub_ps_256: {
2993 // If none of the even or none of the odd lanes are required, turn this
2994 // into a generic FP math instruction.
2995 APInt SubMask = APInt::getSplat(VWidth, APInt(2, 0x1));
2996 APInt AddMask = APInt::getSplat(VWidth, APInt(2, 0x2));
2997 bool IsSubOnly = DemandedElts.isSubsetOf(SubMask);
2998 bool IsAddOnly = DemandedElts.isSubsetOf(AddMask);
2999 if (IsSubOnly || IsAddOnly) {
3000 assert((IsSubOnly ^ IsAddOnly) && "Can't be both add-only and sub-only");
3002 IC.Builder.SetInsertPoint(&II);
3003 Value *Arg0 = II.getArgOperand(0), *Arg1 = II.getArgOperand(1);
3004 return IC.Builder.CreateBinOp(
3005 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
3006 }
3007
3008 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3009 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3010 UndefElts &= UndefElts2;
3011 break;
3012 }
3013
3014 // General per-element vector operations.
3015 case Intrinsic::x86_avx2_psllv_d:
3016 case Intrinsic::x86_avx2_psllv_d_256:
3017 case Intrinsic::x86_avx2_psllv_q:
3018 case Intrinsic::x86_avx2_psllv_q_256:
3019 case Intrinsic::x86_avx2_psrlv_d:
3020 case Intrinsic::x86_avx2_psrlv_d_256:
3021 case Intrinsic::x86_avx2_psrlv_q:
3022 case Intrinsic::x86_avx2_psrlv_q_256:
3023 case Intrinsic::x86_avx2_psrav_d:
3024 case Intrinsic::x86_avx2_psrav_d_256: {
3025 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
3026 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
3027 UndefElts &= UndefElts2;
3028 break;
3029 }
3030
3031 case Intrinsic::x86_sse2_packssdw_128:
3032 case Intrinsic::x86_sse2_packsswb_128:
3033 case Intrinsic::x86_sse2_packuswb_128:
3034 case Intrinsic::x86_sse41_packusdw:
3035 case Intrinsic::x86_avx2_packssdw:
3036 case Intrinsic::x86_avx2_packsswb:
3037 case Intrinsic::x86_avx2_packusdw:
3038 case Intrinsic::x86_avx2_packuswb:
3039 case Intrinsic::x86_avx512_packssdw_512:
3040 case Intrinsic::x86_avx512_packsswb_512:
3041 case Intrinsic::x86_avx512_packusdw_512:
3042 case Intrinsic::x86_avx512_packuswb_512: {
3043 auto *Ty0 = II.getArgOperand(0)->getType();
3044 unsigned InnerVWidth = cast<FixedVectorType>(Ty0)->getNumElements();
3045 assert(VWidth == (InnerVWidth * 2) && "Unexpected input size");
3046
3047 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
3048 unsigned VWidthPerLane = VWidth / NumLanes;
3049 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
3050
3051 // Per lane, pack the elements of the first input and then the second.
3052 // e.g.
3053 // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3])
3054 // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15])
3055 for (int OpNum = 0; OpNum != 2; ++OpNum) {
3056 APInt OpDemandedElts(InnerVWidth, 0);
3057 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3058 unsigned LaneIdx = Lane * VWidthPerLane;
3059 for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
3060 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
3061 if (DemandedElts[Idx])
3062 OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt);
3063 }
3064 }
3065
3066 // Demand elements from the operand.
3067 APInt OpUndefElts(InnerVWidth, 0);
3068 simplifyAndSetOp(&II, OpNum, OpDemandedElts, OpUndefElts);
3069
3070 // Pack the operand's UNDEF elements, one lane at a time.
3071 OpUndefElts = OpUndefElts.zext(VWidth);
3072 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3073 APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane);
3074 LaneElts = LaneElts.getLoBits(InnerVWidthPerLane);
3075 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
3076 UndefElts |= LaneElts;
3077 }
3078 }
3079 break;
3080 }
3081
3082 // PSHUFB
3083 case Intrinsic::x86_ssse3_pshuf_b_128:
3084 case Intrinsic::x86_avx2_pshuf_b:
3085 case Intrinsic::x86_avx512_pshuf_b_512:
3086 // PERMILVAR
3087 case Intrinsic::x86_avx_vpermilvar_ps:
3088 case Intrinsic::x86_avx_vpermilvar_ps_256:
3089 case Intrinsic::x86_avx512_vpermilvar_ps_512:
3090 case Intrinsic::x86_avx_vpermilvar_pd:
3091 case Intrinsic::x86_avx_vpermilvar_pd_256:
3092 case Intrinsic::x86_avx512_vpermilvar_pd_512:
3093 // PERMV
3094 case Intrinsic::x86_avx2_permd:
3095 case Intrinsic::x86_avx2_permps: {
3096 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts);
3097 break;
3098 }
3099
3100 // SSE4A instructions leave the upper 64-bits of the 128-bit result
3101 // in an undefined state.
3102 case Intrinsic::x86_sse4a_extrq:
3103 case Intrinsic::x86_sse4a_extrqi:
3104 case Intrinsic::x86_sse4a_insertq:
3105 case Intrinsic::x86_sse4a_insertqi:
3106 UndefElts.setHighBits(VWidth / 2);
3107 break;
3108 }
3109 return std::nullopt;
3110}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
This file provides the interface for the instcombine pass implementation.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Value * simplifyTernarylogic(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
static Value * simplifyX86addcarry(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86pack(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
static Constant * getNegativeIsTrueBoolVec(Constant *V, const DataLayout &DL)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
static Value * simplifyX86movmsk(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
static Value * getBoolVecFromMask(Value *Mask, const DataLayout &DL)
Convert the x86 XMM integer vector mask to a vector of bools based on each element's most significant...
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Value * RHS
Value * LHS
This file a TargetTransformInfo::Concept conforming object specific to the X86 target machine.
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:613
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1385
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition: APInt.h:1370
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:851
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
This class represents a no-op cast from one type to another.
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1687
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
All zero aggregate value.
Definition: Constants.h:350
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1663
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2140
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1398
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:400
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1560
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2472
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:511
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2523
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1614
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2460
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:539
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2265
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2039
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1533
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1193
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:578
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1091
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1437
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Definition: IRBuilder.h:2554
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2127
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1416
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2494
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1475
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:598
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1666
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2273
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1456
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1519
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1587
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:516
The core instruction combiner logic.
Definition: InstCombiner.h:47
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:341
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:386
virtual Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth=0, bool AllowMultipleUsers=false)=0
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
Definition: InstCombiner.h:113
void addToWorklist(Instruction *I)
Definition: InstCombiner.h:336
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:410
BuilderTy & Builder
Definition: InstCombiner.h:60
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock::iterator InsertBefore, Instruction *MDFrom=nullptr)
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1469
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:854
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
bool isZero() const
Returns true if value is all zero.
Definition: KnownBits.h:77
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:141
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
Definition: KnownBits.h:125