LLVM 23.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
47
48// Define the virtual destructor out-of-line for build efficiency.
50
51const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
52 return nullptr;
53}
54
58
59/// Check whether a given call node is in tail position within its function. If
60/// so, it sets Chain to the input chain of the tail call.
62 SDValue &Chain) const {
64
65 // First, check if tail calls have been disabled in this function.
66 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
67 return false;
68
69 // Conservatively require the attributes of the call to match those of
70 // the return. Ignore following attributes because they don't affect the
71 // call sequence.
72 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74 Attribute::DereferenceableOrNull, Attribute::NoAlias,
75 Attribute::NonNull, Attribute::NoUndef,
76 Attribute::Range, Attribute::NoFPClass})
77 CallerAttrs.removeAttribute(Attr);
78
79 if (CallerAttrs.hasAttributes())
80 return false;
81
82 // It's not safe to eliminate the sign / zero extension of the return value.
83 if (CallerAttrs.contains(Attribute::ZExt) ||
84 CallerAttrs.contains(Attribute::SExt))
85 return false;
86
87 // Check if the only use is a function return node.
88 return isUsedByReturnOnly(Node, Chain);
89}
90
92 const uint32_t *CallerPreservedMask,
93 const SmallVectorImpl<CCValAssign> &ArgLocs,
94 const SmallVectorImpl<SDValue> &OutVals) const {
95 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
96 const CCValAssign &ArgLoc = ArgLocs[I];
97 if (!ArgLoc.isRegLoc())
98 continue;
99 MCRegister Reg = ArgLoc.getLocReg();
100 // Only look at callee saved registers.
101 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
102 continue;
103 // Check that we pass the value used for the caller.
104 // (We look for a CopyFromReg reading a virtual register that is used
105 // for the function live-in value of register Reg)
106 SDValue Value = OutVals[I];
107 if (Value->getOpcode() == ISD::AssertZext)
108 Value = Value.getOperand(0);
109 if (Value->getOpcode() != ISD::CopyFromReg)
110 return false;
111 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
112 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
113 return false;
114 }
115 return true;
116}
117
118/// Set CallLoweringInfo attribute flags based on a call instruction
119/// and called function attributes.
121 unsigned ArgIdx) {
122 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
123 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
124 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
125 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
126 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
127 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
128 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
129 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
130 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
131 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
132 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
133 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
134 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
135 Alignment = Call->getParamStackAlign(ArgIdx);
136 IndirectType = nullptr;
138 "multiple ABI attributes?");
139 if (IsByVal) {
140 IndirectType = Call->getParamByValType(ArgIdx);
141 if (!Alignment)
142 Alignment = Call->getParamAlign(ArgIdx);
143 }
144 if (IsPreallocated)
145 IndirectType = Call->getParamPreallocatedType(ArgIdx);
146 if (IsInAlloca)
147 IndirectType = Call->getParamInAllocaType(ArgIdx);
148 if (IsSRet)
149 IndirectType = Call->getParamStructRetType(ArgIdx);
150}
151
152/// Generate a libcall taking the given operands as arguments and returning a
153/// result of type RetVT.
154std::pair<SDValue, SDValue>
155TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
157 MakeLibCallOptions CallOptions, const SDLoc &dl,
158 SDValue InChain) const {
159 if (LibcallImpl == RTLIB::Unsupported)
160 reportFatalInternalError("unsupported library call operation");
161
162 if (!InChain)
163 InChain = DAG.getEntryNode();
164
166 Args.reserve(Ops.size());
167
168 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
169 for (unsigned i = 0; i < Ops.size(); ++i) {
170 SDValue NewOp = Ops[i];
171 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
172 ? OpsTypeOverrides[i]
173 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
174 TargetLowering::ArgListEntry Entry(NewOp, Ty);
175 if (CallOptions.IsSoften)
176 Entry.OrigTy =
177 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
178
179 Entry.IsSExt =
180 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
181 Entry.IsZExt = !Entry.IsSExt;
182
183 if (CallOptions.IsSoften &&
185 Entry.IsSExt = Entry.IsZExt = false;
186 }
187 Args.push_back(Entry);
188 }
189
190 SDValue Callee =
191 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
192
193 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
194 Type *OrigRetTy = RetTy;
196 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
197 bool zeroExtend = !signExtend;
198
199 if (CallOptions.IsSoften) {
200 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
202 signExtend = zeroExtend = false;
203 }
204
205 CLI.setDebugLoc(dl)
206 .setChain(InChain)
207 .setLibCallee(getLibcallImplCallingConv(LibcallImpl), RetTy, OrigRetTy,
208 Callee, std::move(Args))
209 .setNoReturn(CallOptions.DoesNotReturn)
212 .setSExtResult(signExtend)
213 .setZExtResult(zeroExtend);
214 return LowerCallTo(CLI);
215}
216
218 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
219 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
220 const AttributeList &FuncAttributes, EVT *LargestVT) const {
221 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
222 Op.getSrcAlign() < Op.getDstAlign())
223 return false;
224
225 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
226
227 if (VT == MVT::Other) {
228 // Use the largest integer type whose alignment constraints are satisfied.
229 // We only need to check DstAlign here as SrcAlign is always greater or
230 // equal to DstAlign (or zero).
231 VT = MVT::LAST_INTEGER_VALUETYPE;
232 if (Op.isFixedDstAlign())
233 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
234 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
236 assert(VT.isInteger());
237
238 // Find the largest legal integer type.
239 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
240 while (!isTypeLegal(LVT))
241 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
242 assert(LVT.isInteger());
243
244 // If the type we've chosen is larger than the largest legal integer type
245 // then use that instead.
246 if (VT.bitsGT(LVT))
247 VT = LVT;
248 }
249
250 unsigned NumMemOps = 0;
251 uint64_t Size = Op.size();
252 while (Size) {
253 unsigned VTSize = VT.getSizeInBits() / 8;
254 while (VTSize > Size) {
255 // For now, only use non-vector load / store's for the left-over pieces.
256 EVT NewVT = VT;
257 unsigned NewVTSize;
258
259 bool Found = false;
260 if (VT.isVector() || VT.isFloatingPoint()) {
261 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
264 Found = true;
265 else if (NewVT == MVT::i64 &&
267 isSafeMemOpType(MVT::f64)) {
268 // i64 is usually not legal on 32-bit targets, but f64 may be.
269 NewVT = MVT::f64;
270 Found = true;
271 }
272 }
273
274 if (!Found) {
275 do {
276 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
277 if (NewVT == MVT::i8)
278 break;
279 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
280 }
281 NewVTSize = NewVT.getSizeInBits() / 8;
282
283 // If the new VT cannot cover all of the remaining bits, then consider
284 // issuing a (or a pair of) unaligned and overlapping load / store.
285 unsigned Fast;
286 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
288 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
290 Fast)
291 VTSize = Size;
292 else {
293 VT = NewVT;
294 VTSize = NewVTSize;
295 }
296 }
297
298 if (++NumMemOps > Limit)
299 return false;
300
301 MemOps.push_back(VT);
302 Size -= VTSize;
303 }
304
305 return true;
306}
307
308/// Soften the operands of a comparison. This code is shared among BR_CC,
309/// SELECT_CC, and SETCC handlers.
311 SDValue &NewLHS, SDValue &NewRHS,
312 ISD::CondCode &CCCode,
313 const SDLoc &dl, const SDValue OldLHS,
314 const SDValue OldRHS) const {
315 SDValue Chain;
316 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
317 OldRHS, Chain);
318}
319
321 SDValue &NewLHS, SDValue &NewRHS,
322 ISD::CondCode &CCCode,
323 const SDLoc &dl, const SDValue OldLHS,
324 const SDValue OldRHS,
325 SDValue &Chain,
326 bool IsSignaling) const {
327 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
328 // not supporting it. We can update this code when libgcc provides such
329 // functions.
330
331 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
332 && "Unsupported setcc type!");
333
334 // Expand into one or more soft-fp libcall(s).
335 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
336 bool ShouldInvertCC = false;
337 switch (CCCode) {
338 case ISD::SETEQ:
339 case ISD::SETOEQ:
340 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
341 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
342 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
343 break;
344 case ISD::SETNE:
345 case ISD::SETUNE:
346 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
347 (VT == MVT::f64) ? RTLIB::UNE_F64 :
348 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
349 break;
350 case ISD::SETGE:
351 case ISD::SETOGE:
352 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
353 (VT == MVT::f64) ? RTLIB::OGE_F64 :
354 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
355 break;
356 case ISD::SETLT:
357 case ISD::SETOLT:
358 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
359 (VT == MVT::f64) ? RTLIB::OLT_F64 :
360 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
361 break;
362 case ISD::SETLE:
363 case ISD::SETOLE:
364 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
365 (VT == MVT::f64) ? RTLIB::OLE_F64 :
366 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
367 break;
368 case ISD::SETGT:
369 case ISD::SETOGT:
370 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
371 (VT == MVT::f64) ? RTLIB::OGT_F64 :
372 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
373 break;
374 case ISD::SETO:
375 ShouldInvertCC = true;
376 [[fallthrough]];
377 case ISD::SETUO:
378 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
379 (VT == MVT::f64) ? RTLIB::UO_F64 :
380 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
381 break;
382 case ISD::SETONE:
383 // SETONE = O && UNE
384 ShouldInvertCC = true;
385 [[fallthrough]];
386 case ISD::SETUEQ:
387 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
388 (VT == MVT::f64) ? RTLIB::UO_F64 :
389 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
390 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
391 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
392 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
393 break;
394 default:
395 // Invert CC for unordered comparisons
396 ShouldInvertCC = true;
397 switch (CCCode) {
398 case ISD::SETULT:
399 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
400 (VT == MVT::f64) ? RTLIB::OGE_F64 :
401 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
402 break;
403 case ISD::SETULE:
404 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
405 (VT == MVT::f64) ? RTLIB::OGT_F64 :
406 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
407 break;
408 case ISD::SETUGT:
409 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
410 (VT == MVT::f64) ? RTLIB::OLE_F64 :
411 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
412 break;
413 case ISD::SETUGE:
414 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
415 (VT == MVT::f64) ? RTLIB::OLT_F64 :
416 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
417 break;
418 default: llvm_unreachable("Do not know how to soften this setcc!");
419 }
420 }
421
422 // Use the target specific return value for comparison lib calls.
424 SDValue Ops[2] = {NewLHS, NewRHS};
426 EVT OpsVT[2] = { OldLHS.getValueType(),
427 OldRHS.getValueType() };
428 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
429 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
430 NewLHS = Call.first;
431 NewRHS = DAG.getConstant(0, dl, RetVT);
432
433 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
434 if (LC1Impl == RTLIB::Unsupported) {
436 "no libcall available to soften floating-point compare");
437 }
438
439 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
440 if (ShouldInvertCC) {
441 assert(RetVT.isInteger());
442 CCCode = getSetCCInverse(CCCode, RetVT);
443 }
444
445 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
446 // Update Chain.
447 Chain = Call.second;
448 } else {
449 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
450 if (LC2Impl == RTLIB::Unsupported) {
452 "no libcall available to soften floating-point compare");
453 }
454
455 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
456 "unordered call should be simple boolean");
457
458 EVT SetCCVT =
459 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
461 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
462 DAG.getValueType(MVT::i1));
463 }
464
465 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
466 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
467 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
468 if (ShouldInvertCC)
469 CCCode = getSetCCInverse(CCCode, RetVT);
470 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
471 if (Chain)
472 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
473 Call2.second);
474 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
475 Tmp.getValueType(), Tmp, NewLHS);
476 NewRHS = SDValue();
477 }
478}
479
480/// Return the entry encoding for a jump table in the current function. The
481/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
483 // In non-pic modes, just use the address of a block.
486
487 // Otherwise, use a label difference.
489}
490
492 SelectionDAG &DAG) const {
493 return Table;
494}
495
496/// This returns the relocation base for the given PIC jumptable, the same as
497/// getPICJumpTableRelocBase, but as an MCExpr.
498const MCExpr *
500 unsigned JTI,MCContext &Ctx) const{
501 // The normal PIC reloc base is the label at the start of the jump table.
502 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
503}
504
506 SDValue Addr, int JTI,
507 SelectionDAG &DAG) const {
508 SDValue Chain = Value;
509 // Jump table debug info is only needed if CodeView is enabled.
511 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
512 }
513 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
514}
515
516bool
518 const TargetMachine &TM = getTargetMachine();
519 const GlobalValue *GV = GA->getGlobal();
520
521 // If the address is not even local to this DSO we will have to load it from
522 // a got and then add the offset.
523 if (!TM.shouldAssumeDSOLocal(GV))
524 return false;
525
526 // If the code is position independent we will have to add a base register.
528 return false;
529
530 // Otherwise we can do it.
531 return true;
532}
533
534//===----------------------------------------------------------------------===//
535// Optimization Methods
536//===----------------------------------------------------------------------===//
537
538/// If the specified instruction has a constant integer operand and there are
539/// bits set in that constant that are not demanded, then clear those bits and
540/// return true.
542 const APInt &DemandedBits,
543 const APInt &DemandedElts,
544 TargetLoweringOpt &TLO) const {
545 SDLoc DL(Op);
546 unsigned Opcode = Op.getOpcode();
547
548 // Early-out if we've ended up calling an undemanded node, leave this to
549 // constant folding.
550 if (DemandedBits.isZero() || DemandedElts.isZero())
551 return false;
552
553 // Do target-specific constant optimization.
554 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
555 return TLO.New.getNode();
556
557 // FIXME: ISD::SELECT, ISD::SELECT_CC
558 switch (Opcode) {
559 default:
560 break;
561 case ISD::XOR:
562 case ISD::AND:
563 case ISD::OR: {
564 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
565 if (!Op1C || Op1C->isOpaque())
566 return false;
567
568 // If this is a 'not' op, don't touch it because that's a canonical form.
569 const APInt &C = Op1C->getAPIntValue();
570 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
571 return false;
572
573 if (!C.isSubsetOf(DemandedBits)) {
574 EVT VT = Op.getValueType();
575 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
576 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
577 Op->getFlags());
578 return TLO.CombineTo(Op, NewOp);
579 }
580
581 break;
582 }
583 }
584
585 return false;
586}
587
589 const APInt &DemandedBits,
590 TargetLoweringOpt &TLO) const {
591 EVT VT = Op.getValueType();
592 APInt DemandedElts = VT.isVector()
594 : APInt(1, 1);
595 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
596}
597
598/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
599/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
600/// but it could be generalized for targets with other types of implicit
601/// widening casts.
603 const APInt &DemandedBits,
604 TargetLoweringOpt &TLO) const {
605 assert(Op.getNumOperands() == 2 &&
606 "ShrinkDemandedOp only supports binary operators!");
607 assert(Op.getNode()->getNumValues() == 1 &&
608 "ShrinkDemandedOp only supports nodes with one result!");
609
610 EVT VT = Op.getValueType();
611 SelectionDAG &DAG = TLO.DAG;
612 SDLoc dl(Op);
613
614 // Early return, as this function cannot handle vector types.
615 if (VT.isVector())
616 return false;
617
618 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
619 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
620 "ShrinkDemandedOp only supports operands that have the same size!");
621
622 // Don't do this if the node has another user, which may require the
623 // full value.
624 if (!Op.getNode()->hasOneUse())
625 return false;
626
627 // Search for the smallest integer type with free casts to and from
628 // Op's type. For expedience, just check power-of-2 integer types.
629 unsigned DemandedSize = DemandedBits.getActiveBits();
630 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
631 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
632 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
633 if (isTruncateFree(Op, SmallVT) && isZExtFree(SmallVT, VT)) {
634 // We found a type with free casts.
635
636 // If the operation has the 'disjoint' flag, then the
637 // operands on the new node are also disjoint.
638 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
640 unsigned Opcode = Op.getOpcode();
641 if (Opcode == ISD::PTRADD) {
642 // It isn't a ptradd anymore if it doesn't operate on the entire
643 // pointer.
644 Opcode = ISD::ADD;
645 }
646 SDValue X = DAG.getNode(
647 Opcode, dl, SmallVT,
648 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
649 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
650 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
651 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
652 return TLO.CombineTo(Op, Z);
653 }
654 }
655 return false;
656}
657
659 DAGCombinerInfo &DCI) const {
660 SelectionDAG &DAG = DCI.DAG;
661 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
662 !DCI.isBeforeLegalizeOps());
663 KnownBits Known;
664
665 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
666 if (Simplified) {
667 DCI.AddToWorklist(Op.getNode());
669 }
670 return Simplified;
671}
672
674 const APInt &DemandedElts,
675 DAGCombinerInfo &DCI) const {
676 SelectionDAG &DAG = DCI.DAG;
677 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
678 !DCI.isBeforeLegalizeOps());
679 KnownBits Known;
680
681 bool Simplified =
682 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
683 if (Simplified) {
684 DCI.AddToWorklist(Op.getNode());
686 }
687 return Simplified;
688}
689
691 KnownBits &Known,
693 unsigned Depth,
694 bool AssumeSingleUse) const {
695 EVT VT = Op.getValueType();
696
697 // Since the number of lanes in a scalable vector is unknown at compile time,
698 // we track one bit which is implicitly broadcast to all lanes. This means
699 // that all lanes in a scalable vector are considered demanded.
700 APInt DemandedElts = VT.isFixedLengthVector()
702 : APInt(1, 1);
703 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
704 AssumeSingleUse);
705}
706
707// TODO: Under what circumstances can we create nodes? Constant folding?
709 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
710 SelectionDAG &DAG, unsigned Depth) const {
711 EVT VT = Op.getValueType();
712
713 // Limit search depth.
715 return SDValue();
716
717 // Ignore UNDEFs.
718 if (Op.isUndef())
719 return SDValue();
720
721 // Not demanding any bits/elts from Op.
722 if (DemandedBits == 0 || DemandedElts == 0)
723 return DAG.getUNDEF(VT);
724
725 bool IsLE = DAG.getDataLayout().isLittleEndian();
726 unsigned NumElts = DemandedElts.getBitWidth();
727 unsigned BitWidth = DemandedBits.getBitWidth();
728 KnownBits LHSKnown, RHSKnown;
729 switch (Op.getOpcode()) {
730 case ISD::BITCAST: {
731 if (VT.isScalableVector())
732 return SDValue();
733
734 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
735 EVT SrcVT = Src.getValueType();
736 EVT DstVT = Op.getValueType();
737 if (SrcVT == DstVT)
738 return Src;
739
740 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
741 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
742 if (NumSrcEltBits == NumDstEltBits)
744 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
745 return DAG.getBitcast(DstVT, V);
746
747 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
748 unsigned Scale = NumDstEltBits / NumSrcEltBits;
749 unsigned NumSrcElts = SrcVT.getVectorNumElements();
750 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
751 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
752 for (unsigned i = 0; i != Scale; ++i) {
753 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
754 unsigned BitOffset = EltOffset * NumSrcEltBits;
755 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
756 if (!Sub.isZero()) {
757 DemandedSrcBits |= Sub;
758 for (unsigned j = 0; j != NumElts; ++j)
759 if (DemandedElts[j])
760 DemandedSrcElts.setBit((j * Scale) + i);
761 }
762 }
763
765 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
766 return DAG.getBitcast(DstVT, V);
767 }
768
769 // TODO - bigendian once we have test coverage.
770 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
771 unsigned Scale = NumSrcEltBits / NumDstEltBits;
772 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
773 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
774 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
775 for (unsigned i = 0; i != NumElts; ++i)
776 if (DemandedElts[i]) {
777 unsigned Offset = (i % Scale) * NumDstEltBits;
778 DemandedSrcBits.insertBits(DemandedBits, Offset);
779 DemandedSrcElts.setBit(i / Scale);
780 }
781
783 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
784 return DAG.getBitcast(DstVT, V);
785 }
786
787 break;
788 }
789 case ISD::AND: {
790 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
791 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
792
793 // If all of the demanded bits are known 1 on one side, return the other.
794 // These bits cannot contribute to the result of the 'and' in this
795 // context.
796 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
797 return Op.getOperand(0);
798 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
799 return Op.getOperand(1);
800 break;
801 }
802 case ISD::OR: {
803 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
804 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
805
806 // If all of the demanded bits are known zero on one side, return the
807 // other. These bits cannot contribute to the result of the 'or' in this
808 // context.
809 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
810 return Op.getOperand(0);
811 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
812 return Op.getOperand(1);
813 break;
814 }
815 case ISD::XOR: {
816 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
817 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
818
819 // If all of the demanded bits are known zero on one side, return the
820 // other.
821 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
822 return Op.getOperand(0);
823 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
824 return Op.getOperand(1);
825 break;
826 }
827 case ISD::ADD: {
828 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
829 if (RHSKnown.isZero())
830 return Op.getOperand(0);
831
832 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
833 if (LHSKnown.isZero())
834 return Op.getOperand(1);
835 break;
836 }
837 case ISD::SHL: {
838 // If we are only demanding sign bits then we can use the shift source
839 // directly.
840 if (std::optional<unsigned> MaxSA =
841 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
842 SDValue Op0 = Op.getOperand(0);
843 unsigned ShAmt = *MaxSA;
844 unsigned NumSignBits =
845 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
846 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
847 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
848 return Op0;
849 }
850 break;
851 }
852 case ISD::SRL: {
853 // If we are only demanding sign bits then we can use the shift source
854 // directly.
855 if (std::optional<unsigned> MaxSA =
856 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
857 SDValue Op0 = Op.getOperand(0);
858 unsigned ShAmt = *MaxSA;
859 // Must already be signbits in DemandedBits bounds, and can't demand any
860 // shifted in zeroes.
861 if (DemandedBits.countl_zero() >= ShAmt) {
862 unsigned NumSignBits =
863 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
864 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
865 return Op0;
866 }
867 }
868 break;
869 }
870 case ISD::SETCC: {
871 SDValue Op0 = Op.getOperand(0);
872 SDValue Op1 = Op.getOperand(1);
873 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
874 // If (1) we only need the sign-bit, (2) the setcc operands are the same
875 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
876 // -1, we may be able to bypass the setcc.
877 if (DemandedBits.isSignMask() &&
881 // If we're testing X < 0, then this compare isn't needed - just use X!
882 // FIXME: We're limiting to integer types here, but this should also work
883 // if we don't care about FP signed-zero. The use of SETLT with FP means
884 // that we don't care about NaNs.
885 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
887 return Op0;
888 }
889 break;
890 }
892 // If none of the extended bits are demanded, eliminate the sextinreg.
893 SDValue Op0 = Op.getOperand(0);
894 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
895 unsigned ExBits = ExVT.getScalarSizeInBits();
896 if (DemandedBits.getActiveBits() <= ExBits &&
898 return Op0;
899 // If the input is already sign extended, just drop the extension.
900 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
901 if (NumSignBits >= (BitWidth - ExBits + 1))
902 return Op0;
903 break;
904 }
908 if (VT.isScalableVector())
909 return SDValue();
910
911 // If we only want the lowest element and none of extended bits, then we can
912 // return the bitcasted source vector.
913 SDValue Src = Op.getOperand(0);
914 EVT SrcVT = Src.getValueType();
915 EVT DstVT = Op.getValueType();
916 if (IsLE && DemandedElts == 1 &&
917 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
918 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
919 return DAG.getBitcast(DstVT, Src);
920 }
921 break;
922 }
924 if (VT.isScalableVector())
925 return SDValue();
926
927 // If we don't demand the inserted element, return the base vector.
928 SDValue Vec = Op.getOperand(0);
929 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
930 EVT VecVT = Vec.getValueType();
931 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
932 !DemandedElts[CIdx->getZExtValue()])
933 return Vec;
934 break;
935 }
937 if (VT.isScalableVector())
938 return SDValue();
939
940 SDValue Vec = Op.getOperand(0);
941 SDValue Sub = Op.getOperand(1);
942 uint64_t Idx = Op.getConstantOperandVal(2);
943 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
944 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
945 // If we don't demand the inserted subvector, return the base vector.
946 if (DemandedSubElts == 0)
947 return Vec;
948 break;
949 }
950 case ISD::VECTOR_SHUFFLE: {
952 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
953
954 // If all the demanded elts are from one operand and are inline,
955 // then we can use the operand directly.
956 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
957 for (unsigned i = 0; i != NumElts; ++i) {
958 int M = ShuffleMask[i];
959 if (M < 0 || !DemandedElts[i])
960 continue;
961 AllUndef = false;
962 IdentityLHS &= (M == (int)i);
963 IdentityRHS &= ((M - NumElts) == i);
964 }
965
966 if (AllUndef)
967 return DAG.getUNDEF(Op.getValueType());
968 if (IdentityLHS)
969 return Op.getOperand(0);
970 if (IdentityRHS)
971 return Op.getOperand(1);
972 break;
973 }
974 default:
975 // TODO: Probably okay to remove after audit; here to reduce change size
976 // in initial enablement patch for scalable vectors
977 if (VT.isScalableVector())
978 return SDValue();
979
980 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
982 Op, DemandedBits, DemandedElts, DAG, Depth))
983 return V;
984 break;
985 }
986 return SDValue();
987}
988
991 unsigned Depth) const {
992 EVT VT = Op.getValueType();
993 // Since the number of lanes in a scalable vector is unknown at compile time,
994 // we track one bit which is implicitly broadcast to all lanes. This means
995 // that all lanes in a scalable vector are considered demanded.
996 APInt DemandedElts = VT.isFixedLengthVector()
998 : APInt(1, 1);
999 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1000 Depth);
1001}
1002
1004 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1005 unsigned Depth) const {
1006 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1007 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1008 Depth);
1009}
1010
1011// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1012// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1015 const TargetLowering &TLI,
1016 const APInt &DemandedBits,
1017 const APInt &DemandedElts, unsigned Depth) {
1018 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1019 "SRL or SRA node is required here!");
1020 // Is the right shift using an immediate value of 1?
1021 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1022 if (!N1C || !N1C->isOne())
1023 return SDValue();
1024
1025 // We are looking for an avgfloor
1026 // add(ext, ext)
1027 // or one of these as a avgceil
1028 // add(add(ext, ext), 1)
1029 // add(add(ext, 1), ext)
1030 // add(ext, add(ext, 1))
1031 SDValue Add = Op.getOperand(0);
1032 if (Add.getOpcode() != ISD::ADD)
1033 return SDValue();
1034
1035 SDValue ExtOpA = Add.getOperand(0);
1036 SDValue ExtOpB = Add.getOperand(1);
1037 SDValue Add2;
1038 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1039 ConstantSDNode *ConstOp;
1040 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1041 ConstOp->isOne()) {
1042 ExtOpA = Op1;
1043 ExtOpB = Op3;
1044 Add2 = A;
1045 return true;
1046 }
1047 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1048 ConstOp->isOne()) {
1049 ExtOpA = Op1;
1050 ExtOpB = Op2;
1051 Add2 = A;
1052 return true;
1053 }
1054 return false;
1055 };
1056 bool IsCeil =
1057 (ExtOpA.getOpcode() == ISD::ADD &&
1058 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1059 (ExtOpB.getOpcode() == ISD::ADD &&
1060 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1061
1062 // If the shift is signed (sra):
1063 // - Needs >= 2 sign bit for both operands.
1064 // - Needs >= 2 zero bits.
1065 // If the shift is unsigned (srl):
1066 // - Needs >= 1 zero bit for both operands.
1067 // - Needs 1 demanded bit zero and >= 2 sign bits.
1068 SelectionDAG &DAG = TLO.DAG;
1069 unsigned ShiftOpc = Op.getOpcode();
1070 bool IsSigned = false;
1071 unsigned KnownBits;
1072 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1073 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1074 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1075 unsigned NumZeroA =
1076 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1077 unsigned NumZeroB =
1078 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1079 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1080
1081 switch (ShiftOpc) {
1082 default:
1083 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1084 case ISD::SRA: {
1085 if (NumZero >= 2 && NumSigned < NumZero) {
1086 IsSigned = false;
1087 KnownBits = NumZero;
1088 break;
1089 }
1090 if (NumSigned >= 1) {
1091 IsSigned = true;
1092 KnownBits = NumSigned;
1093 break;
1094 }
1095 return SDValue();
1096 }
1097 case ISD::SRL: {
1098 if (NumZero >= 1 && NumSigned < NumZero) {
1099 IsSigned = false;
1100 KnownBits = NumZero;
1101 break;
1102 }
1103 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1104 IsSigned = true;
1105 KnownBits = NumSigned;
1106 break;
1107 }
1108 return SDValue();
1109 }
1110 }
1111
1112 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1113 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1114
1115 // Find the smallest power-2 type that is legal for this vector size and
1116 // operation, given the original type size and the number of known sign/zero
1117 // bits.
1118 EVT VT = Op.getValueType();
1119 unsigned MinWidth =
1120 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1121 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1123 return SDValue();
1124 if (VT.isVector())
1125 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1126 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1127 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1128 // larger type size to do the transform.
1129 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1130 return SDValue();
1131 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1132 Add.getOperand(1)) &&
1133 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1134 Add2.getOperand(1))))
1135 NVT = VT;
1136 else
1137 return SDValue();
1138 }
1139
1140 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1141 // this is likely to stop other folds (reassociation, value tracking etc.)
1142 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1143 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1144 return SDValue();
1145
1146 SDLoc DL(Op);
1147 SDValue ResultAVG =
1148 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1149 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1150 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1151}
1152
1153/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1154/// result of Op are ever used downstream. If we can use this information to
1155/// simplify Op, create a new simplified DAG node and return true, returning the
1156/// original and new nodes in Old and New. Otherwise, analyze the expression and
1157/// return a mask of Known bits for the expression (used to simplify the
1158/// caller). The Known bits may only be accurate for those bits in the
1159/// OriginalDemandedBits and OriginalDemandedElts.
1161 SDValue Op, const APInt &OriginalDemandedBits,
1162 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1163 unsigned Depth, bool AssumeSingleUse) const {
1164 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1165 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1166 "Mask size mismatches value type size!");
1167
1168 // Don't know anything.
1169 Known = KnownBits(BitWidth);
1170
1171 EVT VT = Op.getValueType();
1172 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1173 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1174 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1175 "Unexpected vector size");
1176
1177 APInt DemandedBits = OriginalDemandedBits;
1178 APInt DemandedElts = OriginalDemandedElts;
1179 SDLoc dl(Op);
1180
1181 // Undef operand.
1182 if (Op.isUndef())
1183 return false;
1184
1185 // We can't simplify target constants.
1186 if (Op.getOpcode() == ISD::TargetConstant)
1187 return false;
1188
1189 if (Op.getOpcode() == ISD::Constant) {
1190 // We know all of the bits for a constant!
1191 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1192 return false;
1193 }
1194
1195 if (Op.getOpcode() == ISD::ConstantFP) {
1196 // We know all of the bits for a floating point constant!
1198 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1199 return false;
1200 }
1201
1202 // Other users may use these bits.
1203 bool HasMultiUse = false;
1204 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1206 // Limit search depth.
1207 return false;
1208 }
1209 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1211 DemandedElts = APInt::getAllOnes(NumElts);
1212 HasMultiUse = true;
1213 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1214 // Not demanding any bits/elts from Op.
1215 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1216 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1217 // Limit search depth.
1218 return false;
1219 }
1220
1221 KnownBits Known2;
1222 switch (Op.getOpcode()) {
1223 case ISD::SCALAR_TO_VECTOR: {
1224 if (VT.isScalableVector())
1225 return false;
1226 if (!DemandedElts[0])
1227 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1228
1229 KnownBits SrcKnown;
1230 SDValue Src = Op.getOperand(0);
1231 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1232 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1233 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1234 return true;
1235
1236 // Upper elements are undef, so only get the knownbits if we just demand
1237 // the bottom element.
1238 if (DemandedElts == 1)
1239 Known = SrcKnown.anyextOrTrunc(BitWidth);
1240 break;
1241 }
1242 case ISD::BUILD_VECTOR:
1243 // Collect the known bits that are shared by every demanded element.
1244 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1245 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1246 return false; // Don't fall through, will infinitely loop.
1247 case ISD::SPLAT_VECTOR: {
1248 SDValue Scl = Op.getOperand(0);
1249 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1250 KnownBits KnownScl;
1251 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1252 return true;
1253
1254 // Implicitly truncate the bits to match the official semantics of
1255 // SPLAT_VECTOR.
1256 Known = KnownScl.trunc(BitWidth);
1257 break;
1258 }
1259 case ISD::LOAD: {
1260 auto *LD = cast<LoadSDNode>(Op);
1261 if (getTargetConstantFromLoad(LD)) {
1262 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1263 return false; // Don't fall through, will infinitely loop.
1264 }
1265 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1266 // If this is a ZEXTLoad and we are looking at the loaded value.
1267 EVT MemVT = LD->getMemoryVT();
1268 unsigned MemBits = MemVT.getScalarSizeInBits();
1269 Known.Zero.setBitsFrom(MemBits);
1270 return false; // Don't fall through, will infinitely loop.
1271 }
1272 break;
1273 }
1275 if (VT.isScalableVector())
1276 return false;
1277 SDValue Vec = Op.getOperand(0);
1278 SDValue Scl = Op.getOperand(1);
1279 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1280 EVT VecVT = Vec.getValueType();
1281
1282 // If index isn't constant, assume we need all vector elements AND the
1283 // inserted element.
1284 APInt DemandedVecElts(DemandedElts);
1285 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1286 unsigned Idx = CIdx->getZExtValue();
1287 DemandedVecElts.clearBit(Idx);
1288
1289 // Inserted element is not required.
1290 if (!DemandedElts[Idx])
1291 return TLO.CombineTo(Op, Vec);
1292 }
1293
1294 KnownBits KnownScl;
1295 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1296 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1297 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1298 return true;
1299
1300 Known = KnownScl.anyextOrTrunc(BitWidth);
1301
1302 KnownBits KnownVec;
1303 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1304 Depth + 1))
1305 return true;
1306
1307 if (!!DemandedVecElts)
1308 Known = Known.intersectWith(KnownVec);
1309
1310 return false;
1311 }
1312 case ISD::INSERT_SUBVECTOR: {
1313 if (VT.isScalableVector())
1314 return false;
1315 // Demand any elements from the subvector and the remainder from the src its
1316 // inserted into.
1317 SDValue Src = Op.getOperand(0);
1318 SDValue Sub = Op.getOperand(1);
1319 uint64_t Idx = Op.getConstantOperandVal(2);
1320 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1321 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1322 APInt DemandedSrcElts = DemandedElts;
1323 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1324
1325 KnownBits KnownSub, KnownSrc;
1326 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1327 Depth + 1))
1328 return true;
1329 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1330 Depth + 1))
1331 return true;
1332
1333 Known.setAllConflict();
1334 if (!!DemandedSubElts)
1335 Known = Known.intersectWith(KnownSub);
1336 if (!!DemandedSrcElts)
1337 Known = Known.intersectWith(KnownSrc);
1338
1339 // Attempt to avoid multi-use src if we don't need anything from it.
1340 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1341 !DemandedSrcElts.isAllOnes()) {
1343 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1345 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1346 if (NewSub || NewSrc) {
1347 NewSub = NewSub ? NewSub : Sub;
1348 NewSrc = NewSrc ? NewSrc : Src;
1349 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1350 Op.getOperand(2));
1351 return TLO.CombineTo(Op, NewOp);
1352 }
1353 }
1354 break;
1355 }
1357 if (VT.isScalableVector())
1358 return false;
1359 // Offset the demanded elts by the subvector index.
1360 SDValue Src = Op.getOperand(0);
1361 if (Src.getValueType().isScalableVector())
1362 break;
1363 uint64_t Idx = Op.getConstantOperandVal(1);
1364 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1365 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1366
1367 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1368 Depth + 1))
1369 return true;
1370
1371 // Attempt to avoid multi-use src if we don't need anything from it.
1372 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1374 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1375 if (DemandedSrc) {
1376 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1377 Op.getOperand(1));
1378 return TLO.CombineTo(Op, NewOp);
1379 }
1380 }
1381 break;
1382 }
1383 case ISD::CONCAT_VECTORS: {
1384 if (VT.isScalableVector())
1385 return false;
1386 Known.setAllConflict();
1387 EVT SubVT = Op.getOperand(0).getValueType();
1388 unsigned NumSubVecs = Op.getNumOperands();
1389 unsigned NumSubElts = SubVT.getVectorNumElements();
1390 for (unsigned i = 0; i != NumSubVecs; ++i) {
1391 APInt DemandedSubElts =
1392 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1393 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1394 Known2, TLO, Depth + 1))
1395 return true;
1396 // Known bits are shared by every demanded subvector element.
1397 if (!!DemandedSubElts)
1398 Known = Known.intersectWith(Known2);
1399 }
1400 break;
1401 }
1402 case ISD::VECTOR_SHUFFLE: {
1403 assert(!VT.isScalableVector());
1404 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1405
1406 // Collect demanded elements from shuffle operands..
1407 APInt DemandedLHS, DemandedRHS;
1408 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1409 DemandedRHS))
1410 break;
1411
1412 if (!!DemandedLHS || !!DemandedRHS) {
1413 SDValue Op0 = Op.getOperand(0);
1414 SDValue Op1 = Op.getOperand(1);
1415
1416 Known.setAllConflict();
1417 if (!!DemandedLHS) {
1418 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1419 Depth + 1))
1420 return true;
1421 Known = Known.intersectWith(Known2);
1422 }
1423 if (!!DemandedRHS) {
1424 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1425 Depth + 1))
1426 return true;
1427 Known = Known.intersectWith(Known2);
1428 }
1429
1430 // Attempt to avoid multi-use ops if we don't need anything from them.
1432 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1434 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1435 if (DemandedOp0 || DemandedOp1) {
1436 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1437 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1438 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1439 return TLO.CombineTo(Op, NewOp);
1440 }
1441 }
1442 break;
1443 }
1444 case ISD::AND: {
1445 SDValue Op0 = Op.getOperand(0);
1446 SDValue Op1 = Op.getOperand(1);
1447
1448 // If the RHS is a constant, check to see if the LHS would be zero without
1449 // using the bits from the RHS. Below, we use knowledge about the RHS to
1450 // simplify the LHS, here we're using information from the LHS to simplify
1451 // the RHS.
1452 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1453 // Do not increment Depth here; that can cause an infinite loop.
1454 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1455 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1456 if ((LHSKnown.Zero & DemandedBits) ==
1457 (~RHSC->getAPIntValue() & DemandedBits))
1458 return TLO.CombineTo(Op, Op0);
1459
1460 // If any of the set bits in the RHS are known zero on the LHS, shrink
1461 // the constant.
1462 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1463 DemandedElts, TLO))
1464 return true;
1465
1466 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1467 // constant, but if this 'and' is only clearing bits that were just set by
1468 // the xor, then this 'and' can be eliminated by shrinking the mask of
1469 // the xor. For example, for a 32-bit X:
1470 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1471 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1472 LHSKnown.One == ~RHSC->getAPIntValue()) {
1473 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1474 return TLO.CombineTo(Op, Xor);
1475 }
1476 }
1477
1478 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1479 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1480 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1481 (Op0.getOperand(0).isUndef() ||
1483 Op0->hasOneUse()) {
1484 unsigned NumSubElts =
1486 unsigned SubIdx = Op0.getConstantOperandVal(2);
1487 APInt DemandedSub =
1488 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1489 KnownBits KnownSubMask =
1490 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1491 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1492 SDValue NewAnd =
1493 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1494 SDValue NewInsert =
1495 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1496 Op0.getOperand(1), Op0.getOperand(2));
1497 return TLO.CombineTo(Op, NewInsert);
1498 }
1499 }
1500
1501 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1502 Depth + 1))
1503 return true;
1504 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1505 Known2, TLO, Depth + 1))
1506 return true;
1507
1508 // If all of the demanded bits are known one on one side, return the other.
1509 // These bits cannot contribute to the result of the 'and'.
1510 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1511 return TLO.CombineTo(Op, Op0);
1512 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1513 return TLO.CombineTo(Op, Op1);
1514 // If all of the demanded bits in the inputs are known zeros, return zero.
1515 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1516 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1517 // If the RHS is a constant, see if we can simplify it.
1518 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1519 TLO))
1520 return true;
1521 // If the operation can be done in a smaller type, do so.
1523 return true;
1524
1525 // Attempt to avoid multi-use ops if we don't need anything from them.
1526 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1528 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1530 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1531 if (DemandedOp0 || DemandedOp1) {
1532 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1533 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1534 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1535 return TLO.CombineTo(Op, NewOp);
1536 }
1537 }
1538
1539 Known &= Known2;
1540 break;
1541 }
1542 case ISD::OR: {
1543 SDValue Op0 = Op.getOperand(0);
1544 SDValue Op1 = Op.getOperand(1);
1545 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1546 Depth + 1)) {
1547 Op->dropFlags(SDNodeFlags::Disjoint);
1548 return true;
1549 }
1550
1551 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1552 Known2, TLO, Depth + 1)) {
1553 Op->dropFlags(SDNodeFlags::Disjoint);
1554 return true;
1555 }
1556
1557 // If all of the demanded bits are known zero on one side, return the other.
1558 // These bits cannot contribute to the result of the 'or'.
1559 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1560 return TLO.CombineTo(Op, Op0);
1561 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1562 return TLO.CombineTo(Op, Op1);
1563 // If the RHS is a constant, see if we can simplify it.
1564 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1565 return true;
1566 // If the operation can be done in a smaller type, do so.
1568 return true;
1569
1570 // Attempt to avoid multi-use ops if we don't need anything from them.
1571 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1573 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1575 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1576 if (DemandedOp0 || DemandedOp1) {
1577 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1578 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1579 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1580 return TLO.CombineTo(Op, NewOp);
1581 }
1582 }
1583
1584 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1585 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1586 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1587 Op0->hasOneUse() && Op1->hasOneUse()) {
1588 // Attempt to match all commutations - m_c_Or would've been useful!
1589 for (int I = 0; I != 2; ++I) {
1590 SDValue X = Op.getOperand(I).getOperand(0);
1591 SDValue C1 = Op.getOperand(I).getOperand(1);
1592 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1593 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1594 if (Alt.getOpcode() == ISD::OR) {
1595 for (int J = 0; J != 2; ++J) {
1596 if (X == Alt.getOperand(J)) {
1597 SDValue Y = Alt.getOperand(1 - J);
1598 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1599 {C1, C2})) {
1600 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1601 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1602 return TLO.CombineTo(
1603 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1604 }
1605 }
1606 }
1607 }
1608 }
1609 }
1610
1611 Known |= Known2;
1612 break;
1613 }
1614 case ISD::XOR: {
1615 SDValue Op0 = Op.getOperand(0);
1616 SDValue Op1 = Op.getOperand(1);
1617
1618 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1619 Depth + 1))
1620 return true;
1621 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1622 Depth + 1))
1623 return true;
1624
1625 // If all of the demanded bits are known zero on one side, return the other.
1626 // These bits cannot contribute to the result of the 'xor'.
1627 if (DemandedBits.isSubsetOf(Known.Zero))
1628 return TLO.CombineTo(Op, Op0);
1629 if (DemandedBits.isSubsetOf(Known2.Zero))
1630 return TLO.CombineTo(Op, Op1);
1631 // If the operation can be done in a smaller type, do so.
1633 return true;
1634
1635 // If all of the unknown bits are known to be zero on one side or the other
1636 // turn this into an *inclusive* or.
1637 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1638 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1639 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1640
1641 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1642 if (C) {
1643 // If one side is a constant, and all of the set bits in the constant are
1644 // also known set on the other side, turn this into an AND, as we know
1645 // the bits will be cleared.
1646 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1647 // NB: it is okay if more bits are known than are requested
1648 if (C->getAPIntValue() == Known2.One) {
1649 SDValue ANDC =
1650 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1651 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1652 }
1653
1654 // If the RHS is a constant, see if we can change it. Don't alter a -1
1655 // constant because that's a 'not' op, and that is better for combining
1656 // and codegen.
1657 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1658 // We're flipping all demanded bits. Flip the undemanded bits too.
1659 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1660 return TLO.CombineTo(Op, New);
1661 }
1662
1663 unsigned Op0Opcode = Op0.getOpcode();
1664 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1665 if (ConstantSDNode *ShiftC =
1666 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1667 // Don't crash on an oversized shift. We can not guarantee that a
1668 // bogus shift has been simplified to undef.
1669 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1670 uint64_t ShiftAmt = ShiftC->getZExtValue();
1672 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1673 : Ones.lshr(ShiftAmt);
1674 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1676 // If the xor constant is a demanded mask, do a 'not' before the
1677 // shift:
1678 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1679 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1680 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1681 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1682 Op0.getOperand(1)));
1683 }
1684 }
1685 }
1686 }
1687 }
1688
1689 // If we can't turn this into a 'not', try to shrink the constant.
1690 if (!C || !C->isAllOnes())
1691 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1692 return true;
1693
1694 // Attempt to avoid multi-use ops if we don't need anything from them.
1695 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1697 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1699 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1700 if (DemandedOp0 || DemandedOp1) {
1701 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1702 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1703 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1704 return TLO.CombineTo(Op, NewOp);
1705 }
1706 }
1707
1708 Known ^= Known2;
1709 break;
1710 }
1711 case ISD::SELECT:
1712 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1713 Known, TLO, Depth + 1))
1714 return true;
1715 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1716 Known2, TLO, Depth + 1))
1717 return true;
1718
1719 // If the operands are constants, see if we can simplify them.
1720 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1721 return true;
1722
1723 // Only known if known in both the LHS and RHS.
1724 Known = Known.intersectWith(Known2);
1725 break;
1726 case ISD::VSELECT:
1727 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1728 Known, TLO, Depth + 1))
1729 return true;
1730 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1731 Known2, TLO, Depth + 1))
1732 return true;
1733
1734 // Only known if known in both the LHS and RHS.
1735 Known = Known.intersectWith(Known2);
1736 break;
1737 case ISD::SELECT_CC:
1738 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1739 Known, TLO, Depth + 1))
1740 return true;
1741 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1742 Known2, TLO, Depth + 1))
1743 return true;
1744
1745 // If the operands are constants, see if we can simplify them.
1746 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1747 return true;
1748
1749 // Only known if known in both the LHS and RHS.
1750 Known = Known.intersectWith(Known2);
1751 break;
1752 case ISD::SETCC: {
1753 SDValue Op0 = Op.getOperand(0);
1754 SDValue Op1 = Op.getOperand(1);
1755 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1756 // If we're testing X < 0, X >= 0, X <= -1 or X > -1
1757 // (X is of integer type) then we only need the sign mask of the previous
1758 // result
1759 if (Op1.getValueType().isInteger() &&
1760 (((CC == ISD::SETLT || CC == ISD::SETGE) && isNullOrNullSplat(Op1)) ||
1761 ((CC == ISD::SETLE || CC == ISD::SETGT) &&
1762 isAllOnesOrAllOnesSplat(Op1)))) {
1763 KnownBits KnownOp0;
1766 DemandedElts, KnownOp0, TLO, Depth + 1))
1767 return true;
1768 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1769 // width as the setcc result, and (3) the result of a setcc conforms to 0
1770 // or -1, we may be able to bypass the setcc.
1771 if (DemandedBits.isSignMask() &&
1775 // If we remove a >= 0 or > -1 (for integers), we need to introduce a
1776 // NOT Operation
1777 if (CC == ISD::SETGE || CC == ISD::SETGT) {
1778 SDLoc DL(Op);
1779 EVT VT = Op0.getValueType();
1780 SDValue NotOp0 = TLO.DAG.getNOT(DL, Op0, VT);
1781 return TLO.CombineTo(Op, NotOp0);
1782 }
1783 return TLO.CombineTo(Op, Op0);
1784 }
1785 }
1786 if (getBooleanContents(Op0.getValueType()) ==
1788 BitWidth > 1)
1789 Known.Zero.setBitsFrom(1);
1790 break;
1791 }
1792 case ISD::SHL: {
1793 SDValue Op0 = Op.getOperand(0);
1794 SDValue Op1 = Op.getOperand(1);
1795 EVT ShiftVT = Op1.getValueType();
1796
1797 if (std::optional<unsigned> KnownSA =
1798 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1799 unsigned ShAmt = *KnownSA;
1800 if (ShAmt == 0)
1801 return TLO.CombineTo(Op, Op0);
1802
1803 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1804 // single shift. We can do this if the bottom bits (which are shifted
1805 // out) are never demanded.
1806 // TODO - support non-uniform vector amounts.
1807 if (Op0.getOpcode() == ISD::SRL) {
1808 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1809 if (std::optional<unsigned> InnerSA =
1810 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1811 unsigned C1 = *InnerSA;
1812 unsigned Opc = ISD::SHL;
1813 int Diff = ShAmt - C1;
1814 if (Diff < 0) {
1815 Diff = -Diff;
1816 Opc = ISD::SRL;
1817 }
1818 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1819 return TLO.CombineTo(
1820 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1821 }
1822 }
1823 }
1824
1825 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1826 // are not demanded. This will likely allow the anyext to be folded away.
1827 // TODO - support non-uniform vector amounts.
1828 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1829 SDValue InnerOp = Op0.getOperand(0);
1830 EVT InnerVT = InnerOp.getValueType();
1831 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1832 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1833 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1834 SDValue NarrowShl = TLO.DAG.getNode(
1835 ISD::SHL, dl, InnerVT, InnerOp,
1836 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1837 return TLO.CombineTo(
1838 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1839 }
1840
1841 // Repeat the SHL optimization above in cases where an extension
1842 // intervenes: (shl (anyext (shr x, c1)), c2) to
1843 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1844 // aren't demanded (as above) and that the shifted upper c1 bits of
1845 // x aren't demanded.
1846 // TODO - support non-uniform vector amounts.
1847 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1848 InnerOp.hasOneUse()) {
1849 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1850 InnerOp, DemandedElts, Depth + 2)) {
1851 unsigned InnerShAmt = *SA2;
1852 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1853 DemandedBits.getActiveBits() <=
1854 (InnerBits - InnerShAmt + ShAmt) &&
1855 DemandedBits.countr_zero() >= ShAmt) {
1856 SDValue NewSA =
1857 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1858 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1859 InnerOp.getOperand(0));
1860 return TLO.CombineTo(
1861 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1862 }
1863 }
1864 }
1865 }
1866
1867 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1868 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1869 Depth + 1)) {
1870 // Disable the nsw and nuw flags. We can no longer guarantee that we
1871 // won't wrap after simplification.
1872 Op->dropFlags(SDNodeFlags::NoWrap);
1873 return true;
1874 }
1875 Known <<= ShAmt;
1876 // low bits known zero.
1877 Known.Zero.setLowBits(ShAmt);
1878
1879 // Attempt to avoid multi-use ops if we don't need anything from them.
1880 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1882 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1883 if (DemandedOp0) {
1884 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1885 return TLO.CombineTo(Op, NewOp);
1886 }
1887 }
1888
1889 // TODO: Can we merge this fold with the one below?
1890 // Try shrinking the operation as long as the shift amount will still be
1891 // in range.
1892 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1893 Op.getNode()->hasOneUse()) {
1894 // Search for the smallest integer type with free casts to and from
1895 // Op's type. For expedience, just check power-of-2 integer types.
1896 unsigned DemandedSize = DemandedBits.getActiveBits();
1897 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1898 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1899 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1900 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1901 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1902 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1903 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1904 assert(DemandedSize <= SmallVTBits &&
1905 "Narrowed below demanded bits?");
1906 // We found a type with free casts.
1907 SDValue NarrowShl = TLO.DAG.getNode(
1908 ISD::SHL, dl, SmallVT,
1909 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1910 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1911 return TLO.CombineTo(
1912 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1913 }
1914 }
1915 }
1916
1917 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1918 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1919 // Only do this if we demand the upper half so the knownbits are correct.
1920 unsigned HalfWidth = BitWidth / 2;
1921 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1922 DemandedBits.countLeadingOnes() >= HalfWidth) {
1923 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1924 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1925 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1926 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1927 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1928 // If we're demanding the upper bits at all, we must ensure
1929 // that the upper bits of the shift result are known to be zero,
1930 // which is equivalent to the narrow shift being NUW.
1931 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1932 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1933 SDNodeFlags Flags;
1934 Flags.setNoSignedWrap(IsNSW);
1935 Flags.setNoUnsignedWrap(IsNUW);
1936 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1937 SDValue NewShiftAmt =
1938 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1939 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1940 NewShiftAmt, Flags);
1941 SDValue NewExt =
1942 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1943 return TLO.CombineTo(Op, NewExt);
1944 }
1945 }
1946 }
1947 } else {
1948 // This is a variable shift, so we can't shift the demand mask by a known
1949 // amount. But if we are not demanding high bits, then we are not
1950 // demanding those bits from the pre-shifted operand either.
1951 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1952 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1953 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1954 Depth + 1)) {
1955 // Disable the nsw and nuw flags. We can no longer guarantee that we
1956 // won't wrap after simplification.
1957 Op->dropFlags(SDNodeFlags::NoWrap);
1958 return true;
1959 }
1960 Known.resetAll();
1961 }
1962 }
1963
1964 // If we are only demanding sign bits then we can use the shift source
1965 // directly.
1966 if (std::optional<unsigned> MaxSA =
1967 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1968 unsigned ShAmt = *MaxSA;
1969 unsigned NumSignBits =
1970 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1971 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1972 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1973 return TLO.CombineTo(Op, Op0);
1974 }
1975 break;
1976 }
1977 case ISD::SRL: {
1978 SDValue Op0 = Op.getOperand(0);
1979 SDValue Op1 = Op.getOperand(1);
1980 EVT ShiftVT = Op1.getValueType();
1981
1982 if (std::optional<unsigned> KnownSA =
1983 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1984 unsigned ShAmt = *KnownSA;
1985 if (ShAmt == 0)
1986 return TLO.CombineTo(Op, Op0);
1987
1988 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1989 // single shift. We can do this if the top bits (which are shifted out)
1990 // are never demanded.
1991 // TODO - support non-uniform vector amounts.
1992 if (Op0.getOpcode() == ISD::SHL) {
1993 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1994 if (std::optional<unsigned> InnerSA =
1995 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1996 unsigned C1 = *InnerSA;
1997 unsigned Opc = ISD::SRL;
1998 int Diff = ShAmt - C1;
1999 if (Diff < 0) {
2000 Diff = -Diff;
2001 Opc = ISD::SHL;
2002 }
2003 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
2004 return TLO.CombineTo(
2005 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
2006 }
2007 }
2008 }
2009
2010 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2011 // single sra. We can do this if the top bits are never demanded.
2012 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2013 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2014 if (std::optional<unsigned> InnerSA =
2015 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2016 unsigned C1 = *InnerSA;
2017 // Clamp the combined shift amount if it exceeds the bit width.
2018 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2019 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2020 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2021 Op0.getOperand(0), NewSA));
2022 }
2023 }
2024 }
2025
2026 APInt InDemandedMask = (DemandedBits << ShAmt);
2027
2028 // If the shift is exact, then it does demand the low bits (and knows that
2029 // they are zero).
2030 if (Op->getFlags().hasExact())
2031 InDemandedMask.setLowBits(ShAmt);
2032
2033 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2034 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2035 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2037 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2038 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2039 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2040 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2041 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2042 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2043 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2044 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2045 SDValue NewShiftAmt =
2046 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2047 SDValue NewShift =
2048 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2049 return TLO.CombineTo(
2050 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2051 }
2052 }
2053
2054 // Compute the new bits that are at the top now.
2055 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2056 Depth + 1))
2057 return true;
2058 Known >>= ShAmt;
2059 // High bits known zero.
2060 Known.Zero.setHighBits(ShAmt);
2061
2062 // Attempt to avoid multi-use ops if we don't need anything from them.
2063 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2065 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2066 if (DemandedOp0) {
2067 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2068 return TLO.CombineTo(Op, NewOp);
2069 }
2070 }
2071 } else {
2072 // Use generic knownbits computation as it has support for non-uniform
2073 // shift amounts.
2074 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2075 }
2076
2077 // If we are only demanding sign bits then we can use the shift source
2078 // directly.
2079 if (std::optional<unsigned> MaxSA =
2080 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2081 unsigned ShAmt = *MaxSA;
2082 // Must already be signbits in DemandedBits bounds, and can't demand any
2083 // shifted in zeroes.
2084 if (DemandedBits.countl_zero() >= ShAmt) {
2085 unsigned NumSignBits =
2086 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2087 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2088 return TLO.CombineTo(Op, Op0);
2089 }
2090 }
2091
2092 // Try to match AVG patterns (after shift simplification).
2093 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2094 DemandedElts, Depth + 1))
2095 return TLO.CombineTo(Op, AVG);
2096
2097 break;
2098 }
2099 case ISD::SRA: {
2100 SDValue Op0 = Op.getOperand(0);
2101 SDValue Op1 = Op.getOperand(1);
2102 EVT ShiftVT = Op1.getValueType();
2103
2104 // If we only want bits that already match the signbit then we don't need
2105 // to shift.
2106 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2107 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2108 NumHiDemandedBits)
2109 return TLO.CombineTo(Op, Op0);
2110
2111 // If this is an arithmetic shift right and only the low-bit is set, we can
2112 // always convert this into a logical shr, even if the shift amount is
2113 // variable. The low bit of the shift cannot be an input sign bit unless
2114 // the shift amount is >= the size of the datatype, which is undefined.
2115 if (DemandedBits.isOne())
2116 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2117
2118 if (std::optional<unsigned> KnownSA =
2119 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2120 unsigned ShAmt = *KnownSA;
2121 if (ShAmt == 0)
2122 return TLO.CombineTo(Op, Op0);
2123
2124 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2125 // supports sext_inreg.
2126 if (Op0.getOpcode() == ISD::SHL) {
2127 if (std::optional<unsigned> InnerSA =
2128 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2129 unsigned LowBits = BitWidth - ShAmt;
2130 EVT ExtVT = VT.changeElementType(
2131 *TLO.DAG.getContext(),
2132 EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits));
2133
2134 if (*InnerSA == ShAmt) {
2135 if (!TLO.LegalOperations() ||
2137 return TLO.CombineTo(
2138 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2139 Op0.getOperand(0),
2140 TLO.DAG.getValueType(ExtVT)));
2141
2142 // Even if we can't convert to sext_inreg, we might be able to
2143 // remove this shift pair if the input is already sign extended.
2144 unsigned NumSignBits =
2145 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2146 if (NumSignBits > ShAmt)
2147 return TLO.CombineTo(Op, Op0.getOperand(0));
2148 }
2149 }
2150 }
2151
2152 APInt InDemandedMask = (DemandedBits << ShAmt);
2153
2154 // If the shift is exact, then it does demand the low bits (and knows that
2155 // they are zero).
2156 if (Op->getFlags().hasExact())
2157 InDemandedMask.setLowBits(ShAmt);
2158
2159 // If any of the demanded bits are produced by the sign extension, we also
2160 // demand the input sign bit.
2161 if (DemandedBits.countl_zero() < ShAmt)
2162 InDemandedMask.setSignBit();
2163
2164 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2165 Depth + 1))
2166 return true;
2167 Known >>= ShAmt;
2168
2169 // If the input sign bit is known to be zero, or if none of the top bits
2170 // are demanded, turn this into an unsigned shift right.
2171 if (Known.Zero[BitWidth - ShAmt - 1] ||
2172 DemandedBits.countl_zero() >= ShAmt) {
2173 SDNodeFlags Flags;
2174 Flags.setExact(Op->getFlags().hasExact());
2175 return TLO.CombineTo(
2176 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2177 }
2178
2179 int Log2 = DemandedBits.exactLogBase2();
2180 if (Log2 >= 0) {
2181 // The bit must come from the sign.
2182 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2183 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2184 }
2185
2186 if (Known.One[BitWidth - ShAmt - 1])
2187 // New bits are known one.
2188 Known.One.setHighBits(ShAmt);
2189
2190 // Attempt to avoid multi-use ops if we don't need anything from them.
2191 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2193 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2194 if (DemandedOp0) {
2195 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2196 return TLO.CombineTo(Op, NewOp);
2197 }
2198 }
2199 }
2200
2201 // Try to match AVG patterns (after shift simplification).
2202 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2203 DemandedElts, Depth + 1))
2204 return TLO.CombineTo(Op, AVG);
2205
2206 break;
2207 }
2208 case ISD::FSHL:
2209 case ISD::FSHR: {
2210 SDValue Op0 = Op.getOperand(0);
2211 SDValue Op1 = Op.getOperand(1);
2212 SDValue Op2 = Op.getOperand(2);
2213 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2214
2215 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2216 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2217
2218 // For fshl, 0-shift returns the 1st arg.
2219 // For fshr, 0-shift returns the 2nd arg.
2220 if (Amt == 0) {
2221 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2222 Known, TLO, Depth + 1))
2223 return true;
2224 break;
2225 }
2226
2227 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2228 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2229 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2230 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2231 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2232 Depth + 1))
2233 return true;
2234 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2235 Depth + 1))
2236 return true;
2237
2238 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2239 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2240 Known = Known.unionWith(Known2);
2241
2242 // Attempt to avoid multi-use ops if we don't need anything from them.
2243 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2244 !DemandedElts.isAllOnes()) {
2246 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2248 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2249 if (DemandedOp0 || DemandedOp1) {
2250 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2251 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2252 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2253 DemandedOp1, Op2);
2254 return TLO.CombineTo(Op, NewOp);
2255 }
2256 }
2257 }
2258
2259 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2260 if (isPowerOf2_32(BitWidth)) {
2261 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2262 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2263 Known2, TLO, Depth + 1))
2264 return true;
2265 }
2266 break;
2267 }
2268 case ISD::ROTL:
2269 case ISD::ROTR: {
2270 SDValue Op0 = Op.getOperand(0);
2271 SDValue Op1 = Op.getOperand(1);
2272 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2273
2274 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2275 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2276 return TLO.CombineTo(Op, Op0);
2277
2278 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2279 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2280 unsigned RevAmt = BitWidth - Amt;
2281
2282 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2283 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2284 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2285 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2286 Depth + 1))
2287 return true;
2288
2289 // rot*(x, 0) --> x
2290 if (Amt == 0)
2291 return TLO.CombineTo(Op, Op0);
2292
2293 // See if we don't demand either half of the rotated bits.
2294 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2295 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2296 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2297 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2298 }
2299 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2300 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2301 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2302 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2303 }
2304 }
2305
2306 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2307 if (isPowerOf2_32(BitWidth)) {
2308 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2309 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2310 Depth + 1))
2311 return true;
2312 }
2313 break;
2314 }
2315 case ISD::SMIN:
2316 case ISD::SMAX:
2317 case ISD::UMIN:
2318 case ISD::UMAX: {
2319 unsigned Opc = Op.getOpcode();
2320 SDValue Op0 = Op.getOperand(0);
2321 SDValue Op1 = Op.getOperand(1);
2322
2323 // If we're only demanding signbits, then we can simplify to OR/AND node.
2324 unsigned BitOp =
2325 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2326 unsigned NumSignBits =
2327 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2328 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2329 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2330 if (NumSignBits >= NumDemandedUpperBits)
2331 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2332
2333 // Check if one arg is always less/greater than (or equal) to the other arg.
2334 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2335 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2336 switch (Opc) {
2337 case ISD::SMIN:
2338 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2339 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2340 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2341 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2342 Known = KnownBits::smin(Known0, Known1);
2343 break;
2344 case ISD::SMAX:
2345 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2346 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2347 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2348 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2349 Known = KnownBits::smax(Known0, Known1);
2350 break;
2351 case ISD::UMIN:
2352 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2353 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2354 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2355 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2356 Known = KnownBits::umin(Known0, Known1);
2357 break;
2358 case ISD::UMAX:
2359 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2360 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2361 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2362 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2363 Known = KnownBits::umax(Known0, Known1);
2364 break;
2365 }
2366 break;
2367 }
2368 case ISD::BITREVERSE: {
2369 SDValue Src = Op.getOperand(0);
2370 APInt DemandedSrcBits = DemandedBits.reverseBits();
2371 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2372 Depth + 1))
2373 return true;
2374 Known = Known2.reverseBits();
2375 break;
2376 }
2377 case ISD::BSWAP: {
2378 SDValue Src = Op.getOperand(0);
2379
2380 // If the only bits demanded come from one byte of the bswap result,
2381 // just shift the input byte into position to eliminate the bswap.
2382 unsigned NLZ = DemandedBits.countl_zero();
2383 unsigned NTZ = DemandedBits.countr_zero();
2384
2385 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2386 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2387 // have 14 leading zeros, round to 8.
2388 NLZ = alignDown(NLZ, 8);
2389 NTZ = alignDown(NTZ, 8);
2390 // If we need exactly one byte, we can do this transformation.
2391 if (BitWidth - NLZ - NTZ == 8) {
2392 // Replace this with either a left or right shift to get the byte into
2393 // the right place.
2394 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2395 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2396 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2397 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2398 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2399 return TLO.CombineTo(Op, NewOp);
2400 }
2401 }
2402
2403 APInt DemandedSrcBits = DemandedBits.byteSwap();
2404 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2405 Depth + 1))
2406 return true;
2407 Known = Known2.byteSwap();
2408 break;
2409 }
2410 case ISD::CTPOP: {
2411 // If only 1 bit is demanded, replace with PARITY as long as we're before
2412 // op legalization.
2413 // FIXME: Limit to scalars for now.
2414 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2415 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2416 Op.getOperand(0)));
2417
2418 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2419 break;
2420 }
2422 SDValue Op0 = Op.getOperand(0);
2423 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2424 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2425
2426 // If we only care about the highest bit, don't bother shifting right.
2427 if (DemandedBits.isSignMask()) {
2428 unsigned MinSignedBits =
2429 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2430 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2431 // However if the input is already sign extended we expect the sign
2432 // extension to be dropped altogether later and do not simplify.
2433 if (!AlreadySignExtended) {
2434 // Compute the correct shift amount type, which must be getShiftAmountTy
2435 // for scalar types after legalization.
2436 SDValue ShiftAmt =
2437 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2438 return TLO.CombineTo(Op,
2439 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2440 }
2441 }
2442
2443 // If none of the extended bits are demanded, eliminate the sextinreg.
2444 if (DemandedBits.getActiveBits() <= ExVTBits)
2445 return TLO.CombineTo(Op, Op0);
2446
2447 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2448
2449 // Since the sign extended bits are demanded, we know that the sign
2450 // bit is demanded.
2451 InputDemandedBits.setBit(ExVTBits - 1);
2452
2453 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2454 Depth + 1))
2455 return true;
2456
2457 // If the sign bit of the input is known set or clear, then we know the
2458 // top bits of the result.
2459
2460 // If the input sign bit is known zero, convert this into a zero extension.
2461 if (Known.Zero[ExVTBits - 1])
2462 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2463
2464 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2465 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2466 Known.One.setBitsFrom(ExVTBits);
2467 Known.Zero &= Mask;
2468 } else { // Input sign bit unknown
2469 Known.Zero &= Mask;
2470 Known.One &= Mask;
2471 }
2472 break;
2473 }
2474 case ISD::BUILD_PAIR: {
2475 EVT HalfVT = Op.getOperand(0).getValueType();
2476 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2477
2478 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2479 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2480
2481 KnownBits KnownLo, KnownHi;
2482
2483 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2484 return true;
2485
2486 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2487 return true;
2488
2489 Known = KnownHi.concat(KnownLo);
2490 break;
2491 }
2493 if (VT.isScalableVector())
2494 return false;
2495 [[fallthrough]];
2496 case ISD::ZERO_EXTEND: {
2497 SDValue Src = Op.getOperand(0);
2498 EVT SrcVT = Src.getValueType();
2499 unsigned InBits = SrcVT.getScalarSizeInBits();
2500 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2501 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2502
2503 // If none of the top bits are demanded, convert this into an any_extend.
2504 if (DemandedBits.getActiveBits() <= InBits) {
2505 // If we only need the non-extended bits of the bottom element
2506 // then we can just bitcast to the result.
2507 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2508 VT.getSizeInBits() == SrcVT.getSizeInBits())
2509 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2510
2511 unsigned Opc =
2513 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2514 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2515 }
2516
2517 APInt InDemandedBits = DemandedBits.trunc(InBits);
2518 APInt InDemandedElts = DemandedElts.zext(InElts);
2519 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2520 Depth + 1)) {
2521 Op->dropFlags(SDNodeFlags::NonNeg);
2522 return true;
2523 }
2524 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2525 Known = Known.zext(BitWidth);
2526
2527 // Attempt to avoid multi-use ops if we don't need anything from them.
2529 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2530 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2531 break;
2532 }
2534 if (VT.isScalableVector())
2535 return false;
2536 [[fallthrough]];
2537 case ISD::SIGN_EXTEND: {
2538 SDValue Src = Op.getOperand(0);
2539 EVT SrcVT = Src.getValueType();
2540 unsigned InBits = SrcVT.getScalarSizeInBits();
2541 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2542 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2543
2544 APInt InDemandedElts = DemandedElts.zext(InElts);
2545 APInt InDemandedBits = DemandedBits.trunc(InBits);
2546
2547 // Since some of the sign extended bits are demanded, we know that the sign
2548 // bit is demanded.
2549 InDemandedBits.setBit(InBits - 1);
2550
2551 // If none of the top bits are demanded, convert this into an any_extend.
2552 if (DemandedBits.getActiveBits() <= InBits) {
2553 // If we only need the non-extended bits of the bottom element
2554 // then we can just bitcast to the result.
2555 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2556 VT.getSizeInBits() == SrcVT.getSizeInBits())
2557 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2558
2559 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2561 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2562 InBits) {
2563 unsigned Opc =
2565 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2566 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2567 }
2568 }
2569
2570 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2571 Depth + 1))
2572 return true;
2573 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2574
2575 // If the sign bit is known one, the top bits match.
2576 Known = Known.sext(BitWidth);
2577
2578 // If the sign bit is known zero, convert this to a zero extend.
2579 if (Known.isNonNegative()) {
2580 unsigned Opc =
2582 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2583 SDNodeFlags Flags;
2584 if (!IsVecInReg)
2585 Flags |= SDNodeFlags::NonNeg;
2586 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2587 }
2588 }
2589
2590 // Attempt to avoid multi-use ops if we don't need anything from them.
2592 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2593 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2594 break;
2595 }
2597 if (VT.isScalableVector())
2598 return false;
2599 [[fallthrough]];
2600 case ISD::ANY_EXTEND: {
2601 SDValue Src = Op.getOperand(0);
2602 EVT SrcVT = Src.getValueType();
2603 unsigned InBits = SrcVT.getScalarSizeInBits();
2604 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2605 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2606
2607 // If we only need the bottom element then we can just bitcast.
2608 // TODO: Handle ANY_EXTEND?
2609 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2610 VT.getSizeInBits() == SrcVT.getSizeInBits())
2611 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2612
2613 APInt InDemandedBits = DemandedBits.trunc(InBits);
2614 APInt InDemandedElts = DemandedElts.zext(InElts);
2615 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2616 Depth + 1))
2617 return true;
2618 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2619 Known = Known.anyext(BitWidth);
2620
2621 // Attempt to avoid multi-use ops if we don't need anything from them.
2623 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2624 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2625 break;
2626 }
2627 case ISD::TRUNCATE: {
2628 SDValue Src = Op.getOperand(0);
2629
2630 // Simplify the input, using demanded bit information, and compute the known
2631 // zero/one bits live out.
2632 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2633 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2634 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2635 Depth + 1)) {
2636 // Disable the nsw and nuw flags. We can no longer guarantee that we
2637 // won't wrap after simplification.
2638 Op->dropFlags(SDNodeFlags::NoWrap);
2639 return true;
2640 }
2641 Known = Known.trunc(BitWidth);
2642
2643 // Attempt to avoid multi-use ops if we don't need anything from them.
2645 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2646 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2647
2648 // If the input is only used by this truncate, see if we can shrink it based
2649 // on the known demanded bits.
2650 switch (Src.getOpcode()) {
2651 default:
2652 break;
2653 case ISD::SRL:
2654 // Shrink SRL by a constant if none of the high bits shifted in are
2655 // demanded.
2656 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2657 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2658 // undesirable.
2659 break;
2660
2661 if (Src.getNode()->hasOneUse()) {
2662 if (isTruncateFree(Src, VT) &&
2663 !isTruncateFree(Src.getValueType(), VT)) {
2664 // If truncate is only free at trunc(srl), do not turn it into
2665 // srl(trunc). The check is done by first check the truncate is free
2666 // at Src's opcode(srl), then check the truncate is not done by
2667 // referencing sub-register. In test, if both trunc(srl) and
2668 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2669 // trunc(srl)'s trunc is free, trunc(srl) is better.
2670 break;
2671 }
2672
2673 std::optional<unsigned> ShAmtC =
2674 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2675 if (!ShAmtC || *ShAmtC >= BitWidth)
2676 break;
2677 unsigned ShVal = *ShAmtC;
2678
2679 APInt HighBits =
2680 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2681 HighBits.lshrInPlace(ShVal);
2682 HighBits = HighBits.trunc(BitWidth);
2683 if (!(HighBits & DemandedBits)) {
2684 // None of the shifted in bits are needed. Add a truncate of the
2685 // shift input, then shift it.
2686 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2687 SDValue NewTrunc =
2688 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2689 return TLO.CombineTo(
2690 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2691 }
2692 }
2693 break;
2694 }
2695
2696 break;
2697 }
2698 case ISD::AssertZext: {
2699 // AssertZext demands all of the high bits, plus any of the low bits
2700 // demanded by its users.
2701 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2703 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2704 TLO, Depth + 1))
2705 return true;
2706
2707 Known.Zero |= ~InMask;
2708 Known.One &= (~Known.Zero);
2709 break;
2710 }
2712 SDValue Src = Op.getOperand(0);
2713 SDValue Idx = Op.getOperand(1);
2714 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2715 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2716
2717 if (SrcEltCnt.isScalable())
2718 return false;
2719
2720 // Demand the bits from every vector element without a constant index.
2721 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2722 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2723 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2724 if (CIdx->getAPIntValue().ult(NumSrcElts))
2725 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2726
2727 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2728 // anything about the extended bits.
2729 APInt DemandedSrcBits = DemandedBits;
2730 if (BitWidth > EltBitWidth)
2731 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2732
2733 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2734 Depth + 1))
2735 return true;
2736
2737 // Attempt to avoid multi-use ops if we don't need anything from them.
2738 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2739 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2740 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2741 SDValue NewOp =
2742 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2743 return TLO.CombineTo(Op, NewOp);
2744 }
2745 }
2746
2747 Known = Known2;
2748 if (BitWidth > EltBitWidth)
2749 Known = Known.anyext(BitWidth);
2750 break;
2751 }
2752 case ISD::BITCAST: {
2753 if (VT.isScalableVector())
2754 return false;
2755 SDValue Src = Op.getOperand(0);
2756 EVT SrcVT = Src.getValueType();
2757 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2758
2759 // If this is an FP->Int bitcast and if the sign bit is the only
2760 // thing demanded, turn this into a FGETSIGN.
2761 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2762 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2763 SrcVT.isFloatingPoint()) {
2764 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2765 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2766 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2767 SrcVT != MVT::f128) {
2768 // Cannot eliminate/lower SHL for f128 yet.
2769 EVT Ty = OpVTLegal ? VT : MVT::i32;
2770 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2771 // place. We expect the SHL to be eliminated by other optimizations.
2772 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2773 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2774 if (!OpVTLegal && OpVTSizeInBits > 32)
2775 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2776 unsigned ShVal = Op.getValueSizeInBits() - 1;
2777 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2778 return TLO.CombineTo(Op,
2779 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2780 }
2781 }
2782
2783 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2784 // Demand the elt/bit if any of the original elts/bits are demanded.
2785 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2786 unsigned Scale = BitWidth / NumSrcEltBits;
2787 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2788 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2789 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2790 for (unsigned i = 0; i != Scale; ++i) {
2791 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2792 unsigned BitOffset = EltOffset * NumSrcEltBits;
2793 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2794 if (!Sub.isZero()) {
2795 DemandedSrcBits |= Sub;
2796 for (unsigned j = 0; j != NumElts; ++j)
2797 if (DemandedElts[j])
2798 DemandedSrcElts.setBit((j * Scale) + i);
2799 }
2800 }
2801
2802 APInt KnownSrcUndef, KnownSrcZero;
2803 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2804 KnownSrcZero, TLO, Depth + 1))
2805 return true;
2806
2807 KnownBits KnownSrcBits;
2808 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2809 KnownSrcBits, TLO, Depth + 1))
2810 return true;
2811 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2812 // TODO - bigendian once we have test coverage.
2813 unsigned Scale = NumSrcEltBits / BitWidth;
2814 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2815 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2816 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2817 for (unsigned i = 0; i != NumElts; ++i)
2818 if (DemandedElts[i]) {
2819 unsigned Offset = (i % Scale) * BitWidth;
2820 DemandedSrcBits.insertBits(DemandedBits, Offset);
2821 DemandedSrcElts.setBit(i / Scale);
2822 }
2823
2824 if (SrcVT.isVector()) {
2825 APInt KnownSrcUndef, KnownSrcZero;
2826 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2827 KnownSrcZero, TLO, Depth + 1))
2828 return true;
2829 }
2830
2831 KnownBits KnownSrcBits;
2832 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2833 KnownSrcBits, TLO, Depth + 1))
2834 return true;
2835
2836 // Attempt to avoid multi-use ops if we don't need anything from them.
2837 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2838 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2839 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2840 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2841 return TLO.CombineTo(Op, NewOp);
2842 }
2843 }
2844 }
2845
2846 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2847 // recursive call where Known may be useful to the caller.
2848 if (Depth > 0) {
2849 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2850 return false;
2851 }
2852 break;
2853 }
2854 case ISD::MUL:
2855 if (DemandedBits.isPowerOf2()) {
2856 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2857 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2858 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2859 unsigned CTZ = DemandedBits.countr_zero();
2860 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2861 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2862 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2863 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2864 return TLO.CombineTo(Op, Shl);
2865 }
2866 }
2867 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2868 // X * X is odd iff X is odd.
2869 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2870 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2871 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2872 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2873 return TLO.CombineTo(Op, And1);
2874 }
2875 [[fallthrough]];
2876 case ISD::PTRADD:
2877 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
2878 break;
2879 // PTRADD behaves like ADD if pointers are represented as integers.
2880 [[fallthrough]];
2881 case ISD::ADD:
2882 case ISD::SUB: {
2883 // Add, Sub, and Mul don't demand any bits in positions beyond that
2884 // of the highest bit demanded of them.
2885 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2886 SDNodeFlags Flags = Op.getNode()->getFlags();
2887 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2888 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2889 KnownBits KnownOp0, KnownOp1;
2890 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2891 const KnownBits &KnownRHS) {
2892 if (Op.getOpcode() == ISD::MUL)
2893 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2894 return Demanded;
2895 };
2896 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2897 Depth + 1) ||
2898 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2899 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2900 // See if the operation should be performed at a smaller bit width.
2902 // Disable the nsw and nuw flags. We can no longer guarantee that we
2903 // won't wrap after simplification.
2904 Op->dropFlags(SDNodeFlags::NoWrap);
2905 return true;
2906 }
2907
2908 // neg x with only low bit demanded is simply x.
2909 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2910 isNullConstant(Op0))
2911 return TLO.CombineTo(Op, Op1);
2912
2913 // Attempt to avoid multi-use ops if we don't need anything from them.
2914 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2916 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2918 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2919 if (DemandedOp0 || DemandedOp1) {
2920 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2921 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2922 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2923 Flags & ~SDNodeFlags::NoWrap);
2924 return TLO.CombineTo(Op, NewOp);
2925 }
2926 }
2927
2928 // If we have a constant operand, we may be able to turn it into -1 if we
2929 // do not demand the high bits. This can make the constant smaller to
2930 // encode, allow more general folding, or match specialized instruction
2931 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2932 // is probably not useful (and could be detrimental).
2934 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2935 if (C && !C->isAllOnes() && !C->isOne() &&
2936 (C->getAPIntValue() | HighMask).isAllOnes()) {
2937 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2938 // Disable the nsw and nuw flags. We can no longer guarantee that we
2939 // won't wrap after simplification.
2940 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2941 Flags & ~SDNodeFlags::NoWrap);
2942 return TLO.CombineTo(Op, NewOp);
2943 }
2944
2945 // Match a multiply with a disguised negated-power-of-2 and convert to a
2946 // an equivalent shift-left amount.
2947 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2948 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2949 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2950 return 0;
2951
2952 // Don't touch opaque constants. Also, ignore zero and power-of-2
2953 // multiplies. Those will get folded later.
2954 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2955 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2956 !MulC->getAPIntValue().isPowerOf2()) {
2957 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2958 if (UnmaskedC.isNegatedPowerOf2())
2959 return (-UnmaskedC).logBase2();
2960 }
2961 return 0;
2962 };
2963
2964 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2965 unsigned ShlAmt) {
2966 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2967 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2968 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2969 return TLO.CombineTo(Op, Res);
2970 };
2971
2973 if (Op.getOpcode() == ISD::ADD) {
2974 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2975 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2976 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2977 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2978 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2979 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2980 }
2981 if (Op.getOpcode() == ISD::SUB) {
2982 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2983 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2984 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2985 }
2986 }
2987
2988 if (Op.getOpcode() == ISD::MUL) {
2989 Known = KnownBits::mul(KnownOp0, KnownOp1);
2990 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
2992 Op.getOpcode() != ISD::SUB, Flags.hasNoSignedWrap(),
2993 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2994 }
2995 break;
2996 }
2997 case ISD::FABS: {
2998 SDValue Op0 = Op.getOperand(0);
2999 APInt SignMask = APInt::getSignMask(BitWidth);
3000
3001 if (!DemandedBits.intersects(SignMask))
3002 return TLO.CombineTo(Op, Op0);
3003
3004 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3005 Depth + 1))
3006 return true;
3007
3008 if (Known.isNonNegative())
3009 return TLO.CombineTo(Op, Op0);
3010 if (Known.isNegative())
3011 return TLO.CombineTo(
3012 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
3013
3014 Known.Zero |= SignMask;
3015 Known.One &= ~SignMask;
3016
3017 break;
3018 }
3019 case ISD::FCOPYSIGN: {
3020 SDValue Op0 = Op.getOperand(0);
3021 SDValue Op1 = Op.getOperand(1);
3022
3023 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3024 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3025 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3026 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3027
3028 if (!DemandedBits.intersects(SignMask0))
3029 return TLO.CombineTo(Op, Op0);
3030
3031 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3032 Known, TLO, Depth + 1) ||
3033 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3034 Depth + 1))
3035 return true;
3036
3037 if (Known2.isNonNegative())
3038 return TLO.CombineTo(
3039 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3040
3041 if (Known2.isNegative())
3042 return TLO.CombineTo(
3043 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3044 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3045
3046 Known.Zero &= ~SignMask0;
3047 Known.One &= ~SignMask0;
3048 break;
3049 }
3050 case ISD::FNEG: {
3051 SDValue Op0 = Op.getOperand(0);
3052 APInt SignMask = APInt::getSignMask(BitWidth);
3053
3054 if (!DemandedBits.intersects(SignMask))
3055 return TLO.CombineTo(Op, Op0);
3056
3057 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3058 Depth + 1))
3059 return true;
3060
3061 if (!Known.isSignUnknown()) {
3062 Known.Zero ^= SignMask;
3063 Known.One ^= SignMask;
3064 }
3065
3066 break;
3067 }
3068 default:
3069 // We also ask the target about intrinsics (which could be specific to it).
3070 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3071 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3072 // TODO: Probably okay to remove after audit; here to reduce change size
3073 // in initial enablement patch for scalable vectors
3074 if (Op.getValueType().isScalableVector())
3075 break;
3077 Known, TLO, Depth))
3078 return true;
3079 break;
3080 }
3081
3082 // Just use computeKnownBits to compute output bits.
3083 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3084 break;
3085 }
3086
3087 // If we know the value of all of the demanded bits, return this as a
3088 // constant.
3090 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3091 // Avoid folding to a constant if any OpaqueConstant is involved.
3092 if (llvm::any_of(Op->ops(), [](SDValue V) {
3093 auto *C = dyn_cast<ConstantSDNode>(V);
3094 return C && C->isOpaque();
3095 }))
3096 return false;
3097 if (VT.isInteger())
3098 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3099 if (VT.isFloatingPoint())
3100 return TLO.CombineTo(
3101 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3102 dl, VT));
3103 }
3104
3105 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3106 // Try again just for the original demanded elts.
3107 // Ensure we do this AFTER constant folding above.
3108 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3109 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3110
3111 return false;
3112}
3113
3115 const APInt &DemandedElts,
3116 DAGCombinerInfo &DCI) const {
3117 SelectionDAG &DAG = DCI.DAG;
3118 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3119 !DCI.isBeforeLegalizeOps());
3120
3121 APInt KnownUndef, KnownZero;
3122 bool Simplified =
3123 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3124 if (Simplified) {
3125 DCI.AddToWorklist(Op.getNode());
3126 DCI.CommitTargetLoweringOpt(TLO);
3127 }
3128
3129 return Simplified;
3130}
3131
3132/// Given a vector binary operation and known undefined elements for each input
3133/// operand, compute whether each element of the output is undefined.
3135 const APInt &UndefOp0,
3136 const APInt &UndefOp1) {
3137 EVT VT = BO.getValueType();
3139 "Vector binop only");
3140
3141 EVT EltVT = VT.getVectorElementType();
3142 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3143 assert(UndefOp0.getBitWidth() == NumElts &&
3144 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3145
3146 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3147 const APInt &UndefVals) {
3148 if (UndefVals[Index])
3149 return DAG.getUNDEF(EltVT);
3150
3151 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3152 // Try hard to make sure that the getNode() call is not creating temporary
3153 // nodes. Ignore opaque integers because they do not constant fold.
3154 SDValue Elt = BV->getOperand(Index);
3155 auto *C = dyn_cast<ConstantSDNode>(Elt);
3156 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3157 return Elt;
3158 }
3159
3160 return SDValue();
3161 };
3162
3163 APInt KnownUndef = APInt::getZero(NumElts);
3164 for (unsigned i = 0; i != NumElts; ++i) {
3165 // If both inputs for this element are either constant or undef and match
3166 // the element type, compute the constant/undef result for this element of
3167 // the vector.
3168 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3169 // not handle FP constants. The code within getNode() should be refactored
3170 // to avoid the danger of creating a bogus temporary node here.
3171 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3172 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3173 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3174 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3175 KnownUndef.setBit(i);
3176 }
3177 return KnownUndef;
3178}
3179
3181 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3182 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3183 bool AssumeSingleUse) const {
3184 EVT VT = Op.getValueType();
3185 unsigned Opcode = Op.getOpcode();
3186 APInt DemandedElts = OriginalDemandedElts;
3187 unsigned NumElts = DemandedElts.getBitWidth();
3188 assert(VT.isVector() && "Expected vector op");
3189
3190 KnownUndef = KnownZero = APInt::getZero(NumElts);
3191
3193 return false;
3194
3195 // TODO: For now we assume we know nothing about scalable vectors.
3196 if (VT.isScalableVector())
3197 return false;
3198
3199 assert(VT.getVectorNumElements() == NumElts &&
3200 "Mask size mismatches value type element count!");
3201
3202 // Undef operand.
3203 if (Op.isUndef()) {
3204 KnownUndef.setAllBits();
3205 return false;
3206 }
3207
3208 // If Op has other users, assume that all elements are needed.
3209 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3210 DemandedElts.setAllBits();
3211
3212 // Not demanding any elements from Op.
3213 if (DemandedElts == 0) {
3214 KnownUndef.setAllBits();
3215 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3216 }
3217
3218 // Limit search depth.
3220 return false;
3221
3222 SDLoc DL(Op);
3223 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3224 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3225
3226 // Helper for demanding the specified elements and all the bits of both binary
3227 // operands.
3228 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3229 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3230 TLO.DAG, Depth + 1);
3231 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3232 TLO.DAG, Depth + 1);
3233 if (NewOp0 || NewOp1) {
3234 SDValue NewOp =
3235 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3236 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3237 return TLO.CombineTo(Op, NewOp);
3238 }
3239 return false;
3240 };
3241
3242 switch (Opcode) {
3243 case ISD::SCALAR_TO_VECTOR: {
3244 if (!DemandedElts[0]) {
3245 KnownUndef.setAllBits();
3246 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3247 }
3248 KnownUndef.setHighBits(NumElts - 1);
3249 break;
3250 }
3251 case ISD::BITCAST: {
3252 SDValue Src = Op.getOperand(0);
3253 EVT SrcVT = Src.getValueType();
3254
3255 if (!SrcVT.isVector()) {
3256 // TODO - bigendian once we have test coverage.
3257 if (IsLE) {
3258 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3259 unsigned EltSize = VT.getScalarSizeInBits();
3260 for (unsigned I = 0; I != NumElts; ++I) {
3261 if (DemandedElts[I]) {
3262 unsigned Offset = I * EltSize;
3263 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3264 }
3265 }
3266 KnownBits Known;
3267 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3268 return true;
3269 }
3270 break;
3271 }
3272
3273 // Fast handling of 'identity' bitcasts.
3274 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3275 if (NumSrcElts == NumElts)
3276 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3277 KnownZero, TLO, Depth + 1);
3278
3279 APInt SrcDemandedElts, SrcZero, SrcUndef;
3280
3281 // Bitcast from 'large element' src vector to 'small element' vector, we
3282 // must demand a source element if any DemandedElt maps to it.
3283 if ((NumElts % NumSrcElts) == 0) {
3284 unsigned Scale = NumElts / NumSrcElts;
3285 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3286 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3287 TLO, Depth + 1))
3288 return true;
3289
3290 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3291 // of the large element.
3292 // TODO - bigendian once we have test coverage.
3293 if (IsLE) {
3294 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3295 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3296 for (unsigned i = 0; i != NumElts; ++i)
3297 if (DemandedElts[i]) {
3298 unsigned Ofs = (i % Scale) * EltSizeInBits;
3299 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3300 }
3301
3302 KnownBits Known;
3303 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3304 TLO, Depth + 1))
3305 return true;
3306
3307 // The bitcast has split each wide element into a number of
3308 // narrow subelements. We have just computed the Known bits
3309 // for wide elements. See if element splitting results in
3310 // some subelements being zero. Only for demanded elements!
3311 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3312 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3313 .isAllOnes())
3314 continue;
3315 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3316 unsigned Elt = Scale * SrcElt + SubElt;
3317 if (DemandedElts[Elt])
3318 KnownZero.setBit(Elt);
3319 }
3320 }
3321 }
3322
3323 // If the src element is zero/undef then all the output elements will be -
3324 // only demanded elements are guaranteed to be correct.
3325 for (unsigned i = 0; i != NumSrcElts; ++i) {
3326 if (SrcDemandedElts[i]) {
3327 if (SrcZero[i])
3328 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3329 if (SrcUndef[i])
3330 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3331 }
3332 }
3333 }
3334
3335 // Bitcast from 'small element' src vector to 'large element' vector, we
3336 // demand all smaller source elements covered by the larger demanded element
3337 // of this vector.
3338 if ((NumSrcElts % NumElts) == 0) {
3339 unsigned Scale = NumSrcElts / NumElts;
3340 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3341 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3342 TLO, Depth + 1))
3343 return true;
3344
3345 // If all the src elements covering an output element are zero/undef, then
3346 // the output element will be as well, assuming it was demanded.
3347 for (unsigned i = 0; i != NumElts; ++i) {
3348 if (DemandedElts[i]) {
3349 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3350 KnownZero.setBit(i);
3351 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3352 KnownUndef.setBit(i);
3353 }
3354 }
3355 }
3356 break;
3357 }
3358 case ISD::FREEZE: {
3359 SDValue N0 = Op.getOperand(0);
3360 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3361 /*PoisonOnly=*/false,
3362 Depth + 1))
3363 return TLO.CombineTo(Op, N0);
3364
3365 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3366 // freeze(op(x, ...)) -> op(freeze(x), ...).
3367 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3368 return TLO.CombineTo(
3370 TLO.DAG.getFreeze(N0.getOperand(0))));
3371 break;
3372 }
3373 case ISD::BUILD_VECTOR: {
3374 // Check all elements and simplify any unused elements with UNDEF.
3375 if (!DemandedElts.isAllOnes()) {
3376 // Don't simplify BROADCASTS.
3377 if (llvm::any_of(Op->op_values(),
3378 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3380 bool Updated = false;
3381 for (unsigned i = 0; i != NumElts; ++i) {
3382 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3383 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3384 KnownUndef.setBit(i);
3385 Updated = true;
3386 }
3387 }
3388 if (Updated)
3389 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3390 }
3391 }
3392 for (unsigned i = 0; i != NumElts; ++i) {
3393 SDValue SrcOp = Op.getOperand(i);
3394 if (SrcOp.isUndef()) {
3395 KnownUndef.setBit(i);
3396 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3398 KnownZero.setBit(i);
3399 }
3400 }
3401 break;
3402 }
3403 case ISD::CONCAT_VECTORS: {
3404 EVT SubVT = Op.getOperand(0).getValueType();
3405 unsigned NumSubVecs = Op.getNumOperands();
3406 unsigned NumSubElts = SubVT.getVectorNumElements();
3407 for (unsigned i = 0; i != NumSubVecs; ++i) {
3408 SDValue SubOp = Op.getOperand(i);
3409 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3410 APInt SubUndef, SubZero;
3411 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3412 Depth + 1))
3413 return true;
3414 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3415 KnownZero.insertBits(SubZero, i * NumSubElts);
3416 }
3417
3418 // Attempt to avoid multi-use ops if we don't need anything from them.
3419 if (!DemandedElts.isAllOnes()) {
3420 bool FoundNewSub = false;
3421 SmallVector<SDValue, 2> DemandedSubOps;
3422 for (unsigned i = 0; i != NumSubVecs; ++i) {
3423 SDValue SubOp = Op.getOperand(i);
3424 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3426 SubOp, SubElts, TLO.DAG, Depth + 1);
3427 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3428 FoundNewSub = NewSubOp ? true : FoundNewSub;
3429 }
3430 if (FoundNewSub) {
3431 SDValue NewOp =
3432 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3433 return TLO.CombineTo(Op, NewOp);
3434 }
3435 }
3436 break;
3437 }
3438 case ISD::INSERT_SUBVECTOR: {
3439 // Demand any elements from the subvector and the remainder from the src it
3440 // is inserted into.
3441 SDValue Src = Op.getOperand(0);
3442 SDValue Sub = Op.getOperand(1);
3443 uint64_t Idx = Op.getConstantOperandVal(2);
3444 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3445 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3446 APInt DemandedSrcElts = DemandedElts;
3447 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3448
3449 // If none of the sub operand elements are demanded, bypass the insert.
3450 if (!DemandedSubElts)
3451 return TLO.CombineTo(Op, Src);
3452
3453 APInt SubUndef, SubZero;
3454 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3455 Depth + 1))
3456 return true;
3457
3458 // If none of the src operand elements are demanded, replace it with undef.
3459 if (!DemandedSrcElts && !Src.isUndef())
3460 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3461 TLO.DAG.getUNDEF(VT), Sub,
3462 Op.getOperand(2)));
3463
3464 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3465 TLO, Depth + 1))
3466 return true;
3467 KnownUndef.insertBits(SubUndef, Idx);
3468 KnownZero.insertBits(SubZero, Idx);
3469
3470 // Attempt to avoid multi-use ops if we don't need anything from them.
3471 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3473 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3475 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3476 if (NewSrc || NewSub) {
3477 NewSrc = NewSrc ? NewSrc : Src;
3478 NewSub = NewSub ? NewSub : Sub;
3479 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3480 NewSub, Op.getOperand(2));
3481 return TLO.CombineTo(Op, NewOp);
3482 }
3483 }
3484 break;
3485 }
3487 // Offset the demanded elts by the subvector index.
3488 SDValue Src = Op.getOperand(0);
3489 if (Src.getValueType().isScalableVector())
3490 break;
3491 uint64_t Idx = Op.getConstantOperandVal(1);
3492 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3493 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3494
3495 APInt SrcUndef, SrcZero;
3496 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3497 Depth + 1))
3498 return true;
3499 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3500 KnownZero = SrcZero.extractBits(NumElts, Idx);
3501
3502 // Attempt to avoid multi-use ops if we don't need anything from them.
3503 if (!DemandedElts.isAllOnes()) {
3505 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3506 if (NewSrc) {
3507 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3508 Op.getOperand(1));
3509 return TLO.CombineTo(Op, NewOp);
3510 }
3511 }
3512 break;
3513 }
3515 SDValue Vec = Op.getOperand(0);
3516 SDValue Scl = Op.getOperand(1);
3517 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3518
3519 // For a legal, constant insertion index, if we don't need this insertion
3520 // then strip it, else remove it from the demanded elts.
3521 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3522 unsigned Idx = CIdx->getZExtValue();
3523 if (!DemandedElts[Idx])
3524 return TLO.CombineTo(Op, Vec);
3525
3526 APInt DemandedVecElts(DemandedElts);
3527 DemandedVecElts.clearBit(Idx);
3528 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3529 KnownZero, TLO, Depth + 1))
3530 return true;
3531
3532 KnownUndef.setBitVal(Idx, Scl.isUndef());
3533
3534 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3535 break;
3536 }
3537
3538 APInt VecUndef, VecZero;
3539 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3540 Depth + 1))
3541 return true;
3542 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3543 break;
3544 }
3545 case ISD::VSELECT: {
3546 SDValue Sel = Op.getOperand(0);
3547 SDValue LHS = Op.getOperand(1);
3548 SDValue RHS = Op.getOperand(2);
3549
3550 // Try to transform the select condition based on the current demanded
3551 // elements.
3552 APInt UndefSel, ZeroSel;
3553 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3554 Depth + 1))
3555 return true;
3556
3557 // See if we can simplify either vselect operand.
3558 APInt DemandedLHS(DemandedElts);
3559 APInt DemandedRHS(DemandedElts);
3560 APInt UndefLHS, ZeroLHS;
3561 APInt UndefRHS, ZeroRHS;
3562 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3563 Depth + 1))
3564 return true;
3565 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3566 Depth + 1))
3567 return true;
3568
3569 KnownUndef = UndefLHS & UndefRHS;
3570 KnownZero = ZeroLHS & ZeroRHS;
3571
3572 // If we know that the selected element is always zero, we don't need the
3573 // select value element.
3574 APInt DemandedSel = DemandedElts & ~KnownZero;
3575 if (DemandedSel != DemandedElts)
3576 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3577 Depth + 1))
3578 return true;
3579
3580 break;
3581 }
3582 case ISD::VECTOR_SHUFFLE: {
3583 SDValue LHS = Op.getOperand(0);
3584 SDValue RHS = Op.getOperand(1);
3585 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3586
3587 // Collect demanded elements from shuffle operands..
3588 APInt DemandedLHS(NumElts, 0);
3589 APInt DemandedRHS(NumElts, 0);
3590 for (unsigned i = 0; i != NumElts; ++i) {
3591 int M = ShuffleMask[i];
3592 if (M < 0 || !DemandedElts[i])
3593 continue;
3594 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3595 if (M < (int)NumElts)
3596 DemandedLHS.setBit(M);
3597 else
3598 DemandedRHS.setBit(M - NumElts);
3599 }
3600
3601 // If either side isn't demanded, replace it by UNDEF. We handle this
3602 // explicitly here to also simplify in case of multiple uses (on the
3603 // contrary to the SimplifyDemandedVectorElts calls below).
3604 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3605 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3606 if (FoldLHS || FoldRHS) {
3607 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3608 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3609 SDValue NewOp =
3610 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3611 return TLO.CombineTo(Op, NewOp);
3612 }
3613
3614 // See if we can simplify either shuffle operand.
3615 APInt UndefLHS, ZeroLHS;
3616 APInt UndefRHS, ZeroRHS;
3617 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3618 Depth + 1))
3619 return true;
3620 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3621 Depth + 1))
3622 return true;
3623
3624 // Simplify mask using undef elements from LHS/RHS.
3625 bool Updated = false;
3626 bool IdentityLHS = true, IdentityRHS = true;
3627 SmallVector<int, 32> NewMask(ShuffleMask);
3628 for (unsigned i = 0; i != NumElts; ++i) {
3629 int &M = NewMask[i];
3630 if (M < 0)
3631 continue;
3632 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3633 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3634 Updated = true;
3635 M = -1;
3636 }
3637 IdentityLHS &= (M < 0) || (M == (int)i);
3638 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3639 }
3640
3641 // Update legal shuffle masks based on demanded elements if it won't reduce
3642 // to Identity which can cause premature removal of the shuffle mask.
3643 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3644 SDValue LegalShuffle =
3645 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3646 if (LegalShuffle)
3647 return TLO.CombineTo(Op, LegalShuffle);
3648 }
3649
3650 // Propagate undef/zero elements from LHS/RHS.
3651 for (unsigned i = 0; i != NumElts; ++i) {
3652 int M = ShuffleMask[i];
3653 if (M < 0) {
3654 KnownUndef.setBit(i);
3655 } else if (M < (int)NumElts) {
3656 if (UndefLHS[M])
3657 KnownUndef.setBit(i);
3658 if (ZeroLHS[M])
3659 KnownZero.setBit(i);
3660 } else {
3661 if (UndefRHS[M - NumElts])
3662 KnownUndef.setBit(i);
3663 if (ZeroRHS[M - NumElts])
3664 KnownZero.setBit(i);
3665 }
3666 }
3667 break;
3668 }
3672 APInt SrcUndef, SrcZero;
3673 SDValue Src = Op.getOperand(0);
3674 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3675 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3676 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3677 Depth + 1))
3678 return true;
3679 KnownZero = SrcZero.zextOrTrunc(NumElts);
3680 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3681
3682 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3683 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3684 DemandedSrcElts == 1) {
3685 // aext - if we just need the bottom element then we can bitcast.
3686 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3687 }
3688
3689 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3690 // zext(undef) upper bits are guaranteed to be zero.
3691 if (DemandedElts.isSubsetOf(KnownUndef))
3692 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3693 KnownUndef.clearAllBits();
3694
3695 // zext - if we just need the bottom element then we can mask:
3696 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3697 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3698 Op->isOnlyUserOf(Src.getNode()) &&
3699 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3700 SDLoc DL(Op);
3701 EVT SrcVT = Src.getValueType();
3702 EVT SrcSVT = SrcVT.getScalarType();
3703 SmallVector<SDValue> MaskElts;
3704 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3705 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3706 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3707 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3708 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3709 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3710 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3711 }
3712 }
3713 }
3714 break;
3715 }
3716
3717 // TODO: There are more binop opcodes that could be handled here - MIN,
3718 // MAX, saturated math, etc.
3719 case ISD::ADD: {
3720 SDValue Op0 = Op.getOperand(0);
3721 SDValue Op1 = Op.getOperand(1);
3722 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3723 APInt UndefLHS, ZeroLHS;
3724 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3725 Depth + 1, /*AssumeSingleUse*/ true))
3726 return true;
3727 }
3728 [[fallthrough]];
3729 }
3730 case ISD::AVGCEILS:
3731 case ISD::AVGCEILU:
3732 case ISD::AVGFLOORS:
3733 case ISD::AVGFLOORU:
3734 case ISD::OR:
3735 case ISD::XOR:
3736 case ISD::SUB:
3737 case ISD::FADD:
3738 case ISD::FSUB:
3739 case ISD::FMUL:
3740 case ISD::FDIV:
3741 case ISD::FREM: {
3742 SDValue Op0 = Op.getOperand(0);
3743 SDValue Op1 = Op.getOperand(1);
3744
3745 APInt UndefRHS, ZeroRHS;
3746 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3747 Depth + 1))
3748 return true;
3749 APInt UndefLHS, ZeroLHS;
3750 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3751 Depth + 1))
3752 return true;
3753
3754 KnownZero = ZeroLHS & ZeroRHS;
3755 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3756
3757 // Attempt to avoid multi-use ops if we don't need anything from them.
3758 // TODO - use KnownUndef to relax the demandedelts?
3759 if (!DemandedElts.isAllOnes())
3760 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3761 return true;
3762 break;
3763 }
3764 case ISD::SHL:
3765 case ISD::SRL:
3766 case ISD::SRA:
3767 case ISD::ROTL:
3768 case ISD::ROTR: {
3769 SDValue Op0 = Op.getOperand(0);
3770 SDValue Op1 = Op.getOperand(1);
3771
3772 APInt UndefRHS, ZeroRHS;
3773 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3774 Depth + 1))
3775 return true;
3776 APInt UndefLHS, ZeroLHS;
3777 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3778 Depth + 1))
3779 return true;
3780
3781 KnownZero = ZeroLHS;
3782 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3783
3784 // Attempt to avoid multi-use ops if we don't need anything from them.
3785 // TODO - use KnownUndef to relax the demandedelts?
3786 if (!DemandedElts.isAllOnes())
3787 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3788 return true;
3789 break;
3790 }
3791 case ISD::MUL:
3792 case ISD::MULHU:
3793 case ISD::MULHS:
3794 case ISD::AND: {
3795 SDValue Op0 = Op.getOperand(0);
3796 SDValue Op1 = Op.getOperand(1);
3797
3798 APInt SrcUndef, SrcZero;
3799 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3800 Depth + 1))
3801 return true;
3802 // If we know that a demanded element was zero in Op1 we don't need to
3803 // demand it in Op0 - its guaranteed to be zero.
3804 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3805 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3806 TLO, Depth + 1))
3807 return true;
3808
3809 KnownUndef &= DemandedElts0;
3810 KnownZero &= DemandedElts0;
3811
3812 // If every element pair has a zero/undef then just fold to zero.
3813 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3814 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3815 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3816 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3817
3818 // If either side has a zero element, then the result element is zero, even
3819 // if the other is an UNDEF.
3820 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3821 // and then handle 'and' nodes with the rest of the binop opcodes.
3822 KnownZero |= SrcZero;
3823 KnownUndef &= SrcUndef;
3824 KnownUndef &= ~KnownZero;
3825
3826 // Attempt to avoid multi-use ops if we don't need anything from them.
3827 if (!DemandedElts.isAllOnes())
3828 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3829 return true;
3830 break;
3831 }
3832 case ISD::TRUNCATE:
3833 case ISD::SIGN_EXTEND:
3834 case ISD::ZERO_EXTEND:
3835 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3836 KnownZero, TLO, Depth + 1))
3837 return true;
3838
3839 if (!DemandedElts.isAllOnes())
3841 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3842 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3843
3844 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3845 // zext(undef) upper bits are guaranteed to be zero.
3846 if (DemandedElts.isSubsetOf(KnownUndef))
3847 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3848 KnownUndef.clearAllBits();
3849 }
3850 break;
3851 case ISD::SINT_TO_FP:
3852 case ISD::UINT_TO_FP:
3853 case ISD::FP_TO_SINT:
3854 case ISD::FP_TO_UINT:
3855 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3856 KnownZero, TLO, Depth + 1))
3857 return true;
3858 // Don't fall through to generic undef -> undef handling.
3859 return false;
3860 default: {
3861 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3862 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3863 KnownZero, TLO, Depth))
3864 return true;
3865 } else {
3866 KnownBits Known;
3867 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3868 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3869 TLO, Depth, AssumeSingleUse))
3870 return true;
3871 }
3872 break;
3873 }
3874 }
3875 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3876
3877 // Constant fold all undef cases.
3878 // TODO: Handle zero cases as well.
3879 if (DemandedElts.isSubsetOf(KnownUndef))
3880 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3881
3882 return false;
3883}
3884
3885/// Determine which of the bits specified in Mask are known to be either zero or
3886/// one and return them in the Known.
3888 KnownBits &Known,
3889 const APInt &DemandedElts,
3890 const SelectionDAG &DAG,
3891 unsigned Depth) const {
3892 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3893 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3894 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3895 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3896 "Should use MaskedValueIsZero if you don't know whether Op"
3897 " is a target node!");
3898 Known.resetAll();
3899}
3900
3903 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3904 unsigned Depth) const {
3905 Known.resetAll();
3906}
3907
3910 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3911 unsigned Depth) const {
3912 Known.resetAll();
3913}
3914
3916 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3917 // The low bits are known zero if the pointer is aligned.
3918 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3919}
3920
3926
3927/// This method can be implemented by targets that want to expose additional
3928/// information about sign bits to the DAG Combiner.
3930 const APInt &,
3931 const SelectionDAG &,
3932 unsigned Depth) const {
3933 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3934 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3935 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3936 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3937 "Should use ComputeNumSignBits if you don't know whether Op"
3938 " is a target node!");
3939 return 1;
3940}
3941
3943 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3944 const MachineRegisterInfo &MRI, unsigned Depth) const {
3945 return 1;
3946}
3947
3949 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3950 TargetLoweringOpt &TLO, unsigned Depth) const {
3951 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3952 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3953 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3954 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3955 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3956 " is a target node!");
3957 return false;
3958}
3959
3961 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3962 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3963 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3964 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3965 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3966 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3967 "Should use SimplifyDemandedBits if you don't know whether Op"
3968 " is a target node!");
3969 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3970 return false;
3971}
3972
3974 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3975 SelectionDAG &DAG, unsigned Depth) const {
3976 assert(
3977 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3978 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3979 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3980 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3981 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3982 " is a target node!");
3983 return SDValue();
3984}
3985
3986SDValue
3989 SelectionDAG &DAG) const {
3990 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3991 if (!LegalMask) {
3992 std::swap(N0, N1);
3994 LegalMask = isShuffleMaskLegal(Mask, VT);
3995 }
3996
3997 if (!LegalMask)
3998 return SDValue();
3999
4000 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
4001}
4002
4004 return nullptr;
4005}
4006
4008 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4009 bool PoisonOnly, unsigned Depth) const {
4010 assert(
4011 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4012 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4013 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4014 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4015 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4016 " is a target node!");
4017
4018 // If Op can't create undef/poison and none of its operands are undef/poison
4019 // then Op is never undef/poison.
4020 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
4021 /*ConsiderFlags*/ true, Depth) &&
4022 all_of(Op->ops(), [&](SDValue V) {
4023 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
4024 Depth + 1);
4025 });
4026}
4027
4029 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4030 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4031 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4032 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4033 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4034 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4035 "Should use canCreateUndefOrPoison if you don't know whether Op"
4036 " is a target node!");
4037 // Be conservative and return true.
4038 return true;
4039}
4040
4042 const APInt &DemandedElts,
4043 const SelectionDAG &DAG,
4044 bool SNaN,
4045 unsigned Depth) const {
4046 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4047 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4048 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4049 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4050 "Should use isKnownNeverNaN if you don't know whether Op"
4051 " is a target node!");
4052 return false;
4053}
4054
4056 const APInt &DemandedElts,
4057 APInt &UndefElts,
4058 const SelectionDAG &DAG,
4059 unsigned Depth) const {
4060 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4061 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4062 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4063 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4064 "Should use isSplatValue if you don't know whether Op"
4065 " is a target node!");
4066 return false;
4067}
4068
4069// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4070// work with truncating build vectors and vectors with elements of less than
4071// 8 bits.
4073 if (!N)
4074 return false;
4075
4076 unsigned EltWidth;
4077 APInt CVal;
4078 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4079 /*AllowTruncation=*/true)) {
4080 CVal = CN->getAPIntValue();
4081 EltWidth = N.getValueType().getScalarSizeInBits();
4082 } else
4083 return false;
4084
4085 // If this is a truncating splat, truncate the splat value.
4086 // Otherwise, we may fail to match the expected values below.
4087 if (EltWidth < CVal.getBitWidth())
4088 CVal = CVal.trunc(EltWidth);
4089
4090 switch (getBooleanContents(N.getValueType())) {
4092 return CVal[0];
4094 return CVal.isOne();
4096 return CVal.isAllOnes();
4097 }
4098
4099 llvm_unreachable("Invalid boolean contents");
4100}
4101
4103 if (!N)
4104 return false;
4105
4107 if (!CN) {
4109 if (!BV)
4110 return false;
4111
4112 // Only interested in constant splats, we don't care about undef
4113 // elements in identifying boolean constants and getConstantSplatNode
4114 // returns NULL if all ops are undef;
4115 CN = BV->getConstantSplatNode();
4116 if (!CN)
4117 return false;
4118 }
4119
4120 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4121 return !CN->getAPIntValue()[0];
4122
4123 return CN->isZero();
4124}
4125
4127 bool SExt) const {
4128 if (VT == MVT::i1)
4129 return N->isOne();
4130
4132 switch (Cnt) {
4134 // An extended value of 1 is always true, unless its original type is i1,
4135 // in which case it will be sign extended to -1.
4136 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4139 return N->isAllOnes() && SExt;
4140 }
4141 llvm_unreachable("Unexpected enumeration.");
4142}
4143
4144/// This helper function of SimplifySetCC tries to optimize the comparison when
4145/// either operand of the SetCC node is a bitwise-and instruction.
4146SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4147 ISD::CondCode Cond, const SDLoc &DL,
4148 DAGCombinerInfo &DCI) const {
4149 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4150 std::swap(N0, N1);
4151
4152 SelectionDAG &DAG = DCI.DAG;
4153 EVT OpVT = N0.getValueType();
4154 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4155 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4156 return SDValue();
4157
4158 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4159 // iff everything but LSB is known zero:
4160 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4163 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4164 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4165 if (DAG.MaskedValueIsZero(N0, UpperBits))
4166 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4167 }
4168
4169 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4170 // test in a narrow type that we can truncate to with no cost. Examples:
4171 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4172 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4173 // TODO: This conservatively checks for type legality on the source and
4174 // destination types. That may inhibit optimizations, but it also
4175 // allows setcc->shift transforms that may be more beneficial.
4176 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4177 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4178 isTypeLegal(OpVT) && N0.hasOneUse()) {
4179 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4180 AndC->getAPIntValue().getActiveBits());
4181 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4182 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4183 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4184 return DAG.getSetCC(DL, VT, Trunc, Zero,
4186 }
4187 }
4188
4189 // Match these patterns in any of their permutations:
4190 // (X & Y) == Y
4191 // (X & Y) != Y
4192 SDValue X, Y;
4193 if (N0.getOperand(0) == N1) {
4194 X = N0.getOperand(1);
4195 Y = N0.getOperand(0);
4196 } else if (N0.getOperand(1) == N1) {
4197 X = N0.getOperand(0);
4198 Y = N0.getOperand(1);
4199 } else {
4200 return SDValue();
4201 }
4202
4203 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4204 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4205 // its liable to create and infinite loop.
4206 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4207 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4209 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4210 // Note that where Y is variable and is known to have at most one bit set
4211 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4212 // equivalent when Y == 0.
4213 assert(OpVT.isInteger());
4215 if (DCI.isBeforeLegalizeOps() ||
4217 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4218 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4219 // If the target supports an 'and-not' or 'and-complement' logic operation,
4220 // try to use that to make a comparison operation more efficient.
4221 // But don't do this transform if the mask is a single bit because there are
4222 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4223 // 'rlwinm' on PPC).
4224
4225 // Bail out if the compare operand that we want to turn into a zero is
4226 // already a zero (otherwise, infinite loop).
4227 if (isNullConstant(Y))
4228 return SDValue();
4229
4230 // Transform this into: ~X & Y == 0.
4231 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4232 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4233 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4234 }
4235
4236 return SDValue();
4237}
4238
4239/// This helper function of SimplifySetCC tries to optimize the comparison when
4240/// either operand of the SetCC node is a bitwise-or instruction.
4241/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4242SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4243 ISD::CondCode Cond, const SDLoc &DL,
4244 DAGCombinerInfo &DCI) const {
4245 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4246 std::swap(N0, N1);
4247
4248 SelectionDAG &DAG = DCI.DAG;
4249 EVT OpVT = N0.getValueType();
4250 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4251 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4252 return SDValue();
4253
4254 // (X | Y) == Y
4255 // (X | Y) != Y
4256 SDValue X;
4257 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4258 // If the target supports an 'and-not' or 'and-complement' logic operation,
4259 // try to use that to make a comparison operation more efficient.
4260
4261 // Bail out if the compare operand that we want to turn into a zero is
4262 // already a zero (otherwise, infinite loop).
4263 if (isNullConstant(N1))
4264 return SDValue();
4265
4266 // Transform this into: X & ~Y ==/!= 0.
4267 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4268 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4269 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4270 }
4271
4272 return SDValue();
4273}
4274
4275/// There are multiple IR patterns that could be checking whether certain
4276/// truncation of a signed number would be lossy or not. The pattern which is
4277/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4278/// We are looking for the following pattern: (KeptBits is a constant)
4279/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4280/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4281/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4282/// We will unfold it into the natural trunc+sext pattern:
4283/// ((%x << C) a>> C) dstcond %x
4284/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4285SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4286 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4287 const SDLoc &DL) const {
4288 // We must be comparing with a constant.
4289 ConstantSDNode *C1;
4290 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4291 return SDValue();
4292
4293 // N0 should be: add %x, (1 << (KeptBits-1))
4294 if (N0->getOpcode() != ISD::ADD)
4295 return SDValue();
4296
4297 // And we must be 'add'ing a constant.
4298 ConstantSDNode *C01;
4299 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4300 return SDValue();
4301
4302 SDValue X = N0->getOperand(0);
4303 EVT XVT = X.getValueType();
4304
4305 // Validate constants ...
4306
4307 APInt I1 = C1->getAPIntValue();
4308
4309 ISD::CondCode NewCond;
4310 if (Cond == ISD::CondCode::SETULT) {
4311 NewCond = ISD::CondCode::SETEQ;
4312 } else if (Cond == ISD::CondCode::SETULE) {
4313 NewCond = ISD::CondCode::SETEQ;
4314 // But need to 'canonicalize' the constant.
4315 I1 += 1;
4316 } else if (Cond == ISD::CondCode::SETUGT) {
4317 NewCond = ISD::CondCode::SETNE;
4318 // But need to 'canonicalize' the constant.
4319 I1 += 1;
4320 } else if (Cond == ISD::CondCode::SETUGE) {
4321 NewCond = ISD::CondCode::SETNE;
4322 } else
4323 return SDValue();
4324
4325 APInt I01 = C01->getAPIntValue();
4326
4327 auto checkConstants = [&I1, &I01]() -> bool {
4328 // Both of them must be power-of-two, and the constant from setcc is bigger.
4329 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4330 };
4331
4332 if (checkConstants()) {
4333 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4334 } else {
4335 // What if we invert constants? (and the target predicate)
4336 I1.negate();
4337 I01.negate();
4338 assert(XVT.isInteger());
4339 NewCond = getSetCCInverse(NewCond, XVT);
4340 if (!checkConstants())
4341 return SDValue();
4342 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4343 }
4344
4345 // They are power-of-two, so which bit is set?
4346 const unsigned KeptBits = I1.logBase2();
4347 const unsigned KeptBitsMinusOne = I01.logBase2();
4348
4349 // Magic!
4350 if (KeptBits != (KeptBitsMinusOne + 1))
4351 return SDValue();
4352 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4353
4354 // We don't want to do this in every single case.
4355 SelectionDAG &DAG = DCI.DAG;
4356 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4357 return SDValue();
4358
4359 // Unfold into: sext_inreg(%x) cond %x
4360 // Where 'cond' will be either 'eq' or 'ne'.
4361 SDValue SExtInReg = DAG.getNode(
4363 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4364 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4365}
4366
4367// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4368SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4369 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4370 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4372 "Should be a comparison with 0.");
4373 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4374 "Valid only for [in]equality comparisons.");
4375
4376 unsigned NewShiftOpcode;
4377 SDValue X, C, Y;
4378
4379 SelectionDAG &DAG = DCI.DAG;
4380
4381 // Look for '(C l>>/<< Y)'.
4382 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4383 // The shift should be one-use.
4384 if (!V.hasOneUse())
4385 return false;
4386 unsigned OldShiftOpcode = V.getOpcode();
4387 switch (OldShiftOpcode) {
4388 case ISD::SHL:
4389 NewShiftOpcode = ISD::SRL;
4390 break;
4391 case ISD::SRL:
4392 NewShiftOpcode = ISD::SHL;
4393 break;
4394 default:
4395 return false; // must be a logical shift.
4396 }
4397 // We should be shifting a constant.
4398 // FIXME: best to use isConstantOrConstantVector().
4399 C = V.getOperand(0);
4400 ConstantSDNode *CC =
4401 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4402 if (!CC)
4403 return false;
4404 Y = V.getOperand(1);
4405
4406 ConstantSDNode *XC =
4407 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4409 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4410 };
4411
4412 // LHS of comparison should be an one-use 'and'.
4413 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4414 return SDValue();
4415
4416 X = N0.getOperand(0);
4417 SDValue Mask = N0.getOperand(1);
4418
4419 // 'and' is commutative!
4420 if (!Match(Mask)) {
4421 std::swap(X, Mask);
4422 if (!Match(Mask))
4423 return SDValue();
4424 }
4425
4426 EVT VT = X.getValueType();
4427
4428 // Produce:
4429 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4430 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4431 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4432 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4433 return T2;
4434}
4435
4436/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4437/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4438/// handle the commuted versions of these patterns.
4439SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4440 ISD::CondCode Cond, const SDLoc &DL,
4441 DAGCombinerInfo &DCI) const {
4442 unsigned BOpcode = N0.getOpcode();
4443 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4444 "Unexpected binop");
4445 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4446
4447 // (X + Y) == X --> Y == 0
4448 // (X - Y) == X --> Y == 0
4449 // (X ^ Y) == X --> Y == 0
4450 SelectionDAG &DAG = DCI.DAG;
4451 EVT OpVT = N0.getValueType();
4452 SDValue X = N0.getOperand(0);
4453 SDValue Y = N0.getOperand(1);
4454 if (X == N1)
4455 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4456
4457 if (Y != N1)
4458 return SDValue();
4459
4460 // (X + Y) == Y --> X == 0
4461 // (X ^ Y) == Y --> X == 0
4462 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4463 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4464
4465 // The shift would not be valid if the operands are boolean (i1).
4466 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4467 return SDValue();
4468
4469 // (X - Y) == Y --> X == Y << 1
4470 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4471 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4472 if (!DCI.isCalledByLegalizer())
4473 DCI.AddToWorklist(YShl1.getNode());
4474 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4475}
4476
4478 SDValue N0, const APInt &C1,
4479 ISD::CondCode Cond, const SDLoc &dl,
4480 SelectionDAG &DAG) {
4481 // Look through truncs that don't change the value of a ctpop.
4482 // FIXME: Add vector support? Need to be careful with setcc result type below.
4483 SDValue CTPOP = N0;
4484 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4486 CTPOP = N0.getOperand(0);
4487
4488 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4489 return SDValue();
4490
4491 EVT CTVT = CTPOP.getValueType();
4492 SDValue CTOp = CTPOP.getOperand(0);
4493
4494 // Expand a power-of-2-or-zero comparison based on ctpop:
4495 // (ctpop x) u< 2 -> (x & x-1) == 0
4496 // (ctpop x) u> 1 -> (x & x-1) != 0
4497 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4498 // Keep the CTPOP if it is a cheap vector op.
4499 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4500 return SDValue();
4501
4502 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4503 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4504 return SDValue();
4505 if (C1 == 0 && (Cond == ISD::SETULT))
4506 return SDValue(); // This is handled elsewhere.
4507
4508 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4509
4510 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4511 SDValue Result = CTOp;
4512 for (unsigned i = 0; i < Passes; i++) {
4513 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4514 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4515 }
4517 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4518 }
4519
4520 // Expand a power-of-2 comparison based on ctpop
4521 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4522 // Keep the CTPOP if it is cheap.
4523 if (TLI.isCtpopFast(CTVT))
4524 return SDValue();
4525
4526 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4527 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4528 assert(CTVT.isInteger());
4529 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4530
4531 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4532 // check before emitting a potentially unnecessary op.
4533 if (DAG.isKnownNeverZero(CTOp)) {
4534 // (ctpop x) == 1 --> (x & x-1) == 0
4535 // (ctpop x) != 1 --> (x & x-1) != 0
4536 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4537 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4538 return RHS;
4539 }
4540
4541 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4542 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4543 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4545 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4546 }
4547
4548 return SDValue();
4549}
4550
4552 ISD::CondCode Cond, const SDLoc &dl,
4553 SelectionDAG &DAG) {
4554 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4555 return SDValue();
4556
4557 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4558 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4559 return SDValue();
4560
4561 auto getRotateSource = [](SDValue X) {
4562 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4563 return X.getOperand(0);
4564 return SDValue();
4565 };
4566
4567 // Peek through a rotated value compared against 0 or -1:
4568 // (rot X, Y) == 0/-1 --> X == 0/-1
4569 // (rot X, Y) != 0/-1 --> X != 0/-1
4570 if (SDValue R = getRotateSource(N0))
4571 return DAG.getSetCC(dl, VT, R, N1, Cond);
4572
4573 // Peek through an 'or' of a rotated value compared against 0:
4574 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4575 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4576 //
4577 // TODO: Add the 'and' with -1 sibling.
4578 // TODO: Recurse through a series of 'or' ops to find the rotate.
4579 EVT OpVT = N0.getValueType();
4580 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4581 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4582 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4583 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4584 }
4585 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4586 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4587 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4588 }
4589 }
4590
4591 return SDValue();
4592}
4593
4595 ISD::CondCode Cond, const SDLoc &dl,
4596 SelectionDAG &DAG) {
4597 // If we are testing for all-bits-clear, we might be able to do that with
4598 // less shifting since bit-order does not matter.
4599 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4600 return SDValue();
4601
4602 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4603 if (!C1 || !C1->isZero())
4604 return SDValue();
4605
4606 if (!N0.hasOneUse() ||
4607 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4608 return SDValue();
4609
4610 unsigned BitWidth = N0.getScalarValueSizeInBits();
4611 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4612 if (!ShAmtC)
4613 return SDValue();
4614
4615 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4616 if (ShAmt == 0)
4617 return SDValue();
4618
4619 // Canonicalize fshr as fshl to reduce pattern-matching.
4620 if (N0.getOpcode() == ISD::FSHR)
4621 ShAmt = BitWidth - ShAmt;
4622
4623 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4624 SDValue X, Y;
4625 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4626 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4627 return false;
4628 if (Or.getOperand(0) == Other) {
4629 X = Or.getOperand(0);
4630 Y = Or.getOperand(1);
4631 return true;
4632 }
4633 if (Or.getOperand(1) == Other) {
4634 X = Or.getOperand(1);
4635 Y = Or.getOperand(0);
4636 return true;
4637 }
4638 return false;
4639 };
4640
4641 EVT OpVT = N0.getValueType();
4642 EVT ShAmtVT = N0.getOperand(2).getValueType();
4643 SDValue F0 = N0.getOperand(0);
4644 SDValue F1 = N0.getOperand(1);
4645 if (matchOr(F0, F1)) {
4646 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4647 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4648 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4649 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4650 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4651 }
4652 if (matchOr(F1, F0)) {
4653 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4654 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4655 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4656 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4657 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4658 }
4659
4660 return SDValue();
4661}
4662
4663/// Try to simplify a setcc built with the specified operands and cc. If it is
4664/// unable to simplify it, return a null SDValue.
4666 ISD::CondCode Cond, bool foldBooleans,
4667 DAGCombinerInfo &DCI,
4668 const SDLoc &dl) const {
4669 SelectionDAG &DAG = DCI.DAG;
4670 const DataLayout &Layout = DAG.getDataLayout();
4671 EVT OpVT = N0.getValueType();
4672 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4673
4674 // Constant fold or commute setcc.
4675 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4676 return Fold;
4677
4678 bool N0ConstOrSplat =
4679 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4680 bool N1ConstOrSplat =
4681 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4682
4683 // Canonicalize toward having the constant on the RHS.
4684 // TODO: Handle non-splat vector constants. All undef causes trouble.
4685 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4686 // infinite loop here when we encounter one.
4688 if (N0ConstOrSplat && !N1ConstOrSplat &&
4689 (DCI.isBeforeLegalizeOps() ||
4690 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4691 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4692
4693 // If we have a subtract with the same 2 non-constant operands as this setcc
4694 // -- but in reverse order -- then try to commute the operands of this setcc
4695 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4696 // instruction on some targets.
4697 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4698 (DCI.isBeforeLegalizeOps() ||
4699 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4700 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4701 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4702 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4703
4704 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4705 return V;
4706
4707 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4708 return V;
4709
4710 if (auto *N1C = isConstOrConstSplat(N1)) {
4711 const APInt &C1 = N1C->getAPIntValue();
4712
4713 // Optimize some CTPOP cases.
4714 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4715 return V;
4716
4717 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4718 // X * Y == 0 --> (X == 0) || (Y == 0)
4719 // X * Y != 0 --> (X != 0) && (Y != 0)
4720 // TODO: This bails out if minsize is set, but if the target doesn't have a
4721 // single instruction multiply for this type, it would likely be
4722 // smaller to decompose.
4723 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4724 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4725 (N0->getFlags().hasNoUnsignedWrap() ||
4726 N0->getFlags().hasNoSignedWrap()) &&
4727 !Attr.hasFnAttr(Attribute::MinSize)) {
4728 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4729 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4730 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4731 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4732 }
4733
4734 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4735 // equality comparison, then we're just comparing whether X itself is
4736 // zero.
4737 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4738 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4740 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4741 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4742 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4743 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4744 // (srl (ctlz x), 5) == 0 -> X != 0
4745 // (srl (ctlz x), 5) != 1 -> X != 0
4746 Cond = ISD::SETNE;
4747 } else {
4748 // (srl (ctlz x), 5) != 0 -> X == 0
4749 // (srl (ctlz x), 5) == 1 -> X == 0
4750 Cond = ISD::SETEQ;
4751 }
4752 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4753 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4754 Cond);
4755 }
4756 }
4757 }
4758 }
4759
4760 // setcc X, 0, setlt --> X (when X is all sign bits)
4761 // setcc X, 0, setne --> X (when X is all sign bits)
4762 //
4763 // When we know that X has 0 or -1 in each element (or scalar), this
4764 // comparison will produce X. This is only true when boolean contents are
4765 // represented via 0s and -1s.
4766 if (VT == OpVT &&
4767 // Check that the result of setcc is 0 and -1.
4769 // Match only for checks X < 0 and X != 0
4770 (Cond == ISD::SETLT || Cond == ISD::SETNE) && isNullOrNullSplat(N1) &&
4771 // The identity holds iff we know all sign bits for all lanes.
4773 return N0;
4774
4775 // FIXME: Support vectors.
4776 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4777 const APInt &C1 = N1C->getAPIntValue();
4778
4779 // (zext x) == C --> x == (trunc C)
4780 // (sext x) == C --> x == (trunc C)
4781 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4782 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4783 unsigned MinBits = N0.getValueSizeInBits();
4784 SDValue PreExt;
4785 bool Signed = false;
4786 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4787 // ZExt
4788 MinBits = N0->getOperand(0).getValueSizeInBits();
4789 PreExt = N0->getOperand(0);
4790 } else if (N0->getOpcode() == ISD::AND) {
4791 // DAGCombine turns costly ZExts into ANDs
4792 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4793 if ((C->getAPIntValue()+1).isPowerOf2()) {
4794 MinBits = C->getAPIntValue().countr_one();
4795 PreExt = N0->getOperand(0);
4796 }
4797 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4798 // SExt
4799 MinBits = N0->getOperand(0).getValueSizeInBits();
4800 PreExt = N0->getOperand(0);
4801 Signed = true;
4802 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4803 // ZEXTLOAD / SEXTLOAD
4804 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4805 MinBits = LN0->getMemoryVT().getSizeInBits();
4806 PreExt = N0;
4807 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4808 Signed = true;
4809 MinBits = LN0->getMemoryVT().getSizeInBits();
4810 PreExt = N0;
4811 }
4812 }
4813
4814 // Figure out how many bits we need to preserve this constant.
4815 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4816
4817 // Make sure we're not losing bits from the constant.
4818 if (MinBits > 0 &&
4819 MinBits < C1.getBitWidth() &&
4820 MinBits >= ReqdBits) {
4821 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4822 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4823 // Will get folded away.
4824 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4825 if (MinBits == 1 && C1 == 1)
4826 // Invert the condition.
4827 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4829 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4830 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4831 }
4832
4833 // If truncating the setcc operands is not desirable, we can still
4834 // simplify the expression in some cases:
4835 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4836 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4837 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4838 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4839 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4840 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4841 SDValue TopSetCC = N0->getOperand(0);
4842 unsigned N0Opc = N0->getOpcode();
4843 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4844 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4845 TopSetCC.getOpcode() == ISD::SETCC &&
4846 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4847 (isConstFalseVal(N1) ||
4848 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4849
4850 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4851 (!N1C->isZero() && Cond == ISD::SETNE);
4852
4853 if (!Inverse)
4854 return TopSetCC;
4855
4857 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4858 TopSetCC.getOperand(0).getValueType());
4859 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4860 TopSetCC.getOperand(1),
4861 InvCond);
4862 }
4863 }
4864 }
4865
4866 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4867 // equality or unsigned, and all 1 bits of the const are in the same
4868 // partial word, see if we can shorten the load.
4869 if (DCI.isBeforeLegalize() &&
4871 N0.getOpcode() == ISD::AND && C1 == 0 &&
4872 N0.getNode()->hasOneUse() &&
4873 isa<LoadSDNode>(N0.getOperand(0)) &&
4874 N0.getOperand(0).getNode()->hasOneUse() &&
4876 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4877 APInt bestMask;
4878 unsigned bestWidth = 0, bestOffset = 0;
4879 if (Lod->isSimple() && Lod->isUnindexed() &&
4880 (Lod->getMemoryVT().isByteSized() ||
4881 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4882 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4883 unsigned origWidth = N0.getValueSizeInBits();
4884 unsigned maskWidth = origWidth;
4885 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4886 // 8 bits, but have to be careful...
4887 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4888 origWidth = Lod->getMemoryVT().getSizeInBits();
4889 const APInt &Mask = N0.getConstantOperandAPInt(1);
4890 // Only consider power-of-2 widths (and at least one byte) as candiates
4891 // for the narrowed load.
4892 for (unsigned width = 8; width < origWidth; width *= 2) {
4893 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4894 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4895 // Avoid accessing any padding here for now (we could use memWidth
4896 // instead of origWidth here otherwise).
4897 unsigned maxOffset = origWidth - width;
4898 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4899 if (Mask.isSubsetOf(newMask)) {
4900 unsigned ptrOffset =
4901 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4902 unsigned IsFast = 0;
4903 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4904 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4906 ptrOffset / 8) &&
4908 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4909 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4910 IsFast) {
4911 bestOffset = ptrOffset / 8;
4912 bestMask = Mask.lshr(offset);
4913 bestWidth = width;
4914 break;
4915 }
4916 }
4917 newMask <<= 8;
4918 }
4919 if (bestWidth)
4920 break;
4921 }
4922 }
4923 if (bestWidth) {
4924 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4925 SDValue Ptr = Lod->getBasePtr();
4926 if (bestOffset != 0)
4927 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4928 SDValue NewLoad =
4929 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4930 Lod->getPointerInfo().getWithOffset(bestOffset),
4931 Lod->getBaseAlign());
4932 SDValue And =
4933 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4934 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4935 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4936 }
4937 }
4938
4939 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4940 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4941 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4942
4943 // If the comparison constant has bits in the upper part, the
4944 // zero-extended value could never match.
4946 C1.getBitWidth() - InSize))) {
4947 switch (Cond) {
4948 case ISD::SETUGT:
4949 case ISD::SETUGE:
4950 case ISD::SETEQ:
4951 return DAG.getConstant(0, dl, VT);
4952 case ISD::SETULT:
4953 case ISD::SETULE:
4954 case ISD::SETNE:
4955 return DAG.getConstant(1, dl, VT);
4956 case ISD::SETGT:
4957 case ISD::SETGE:
4958 // True if the sign bit of C1 is set.
4959 return DAG.getConstant(C1.isNegative(), dl, VT);
4960 case ISD::SETLT:
4961 case ISD::SETLE:
4962 // True if the sign bit of C1 isn't set.
4963 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4964 default:
4965 break;
4966 }
4967 }
4968
4969 // Otherwise, we can perform the comparison with the low bits.
4970 switch (Cond) {
4971 case ISD::SETEQ:
4972 case ISD::SETNE:
4973 case ISD::SETUGT:
4974 case ISD::SETUGE:
4975 case ISD::SETULT:
4976 case ISD::SETULE: {
4977 EVT newVT = N0.getOperand(0).getValueType();
4978 // FIXME: Should use isNarrowingProfitable.
4979 if (DCI.isBeforeLegalizeOps() ||
4980 (isOperationLegal(ISD::SETCC, newVT) &&
4981 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
4983 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4984 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4985
4986 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4987 NewConst, Cond);
4988 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4989 }
4990 break;
4991 }
4992 default:
4993 break; // todo, be more careful with signed comparisons
4994 }
4995 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4996 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4998 OpVT)) {
4999 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
5000 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
5001 EVT ExtDstTy = N0.getValueType();
5002 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
5003
5004 // If the constant doesn't fit into the number of bits for the source of
5005 // the sign extension, it is impossible for both sides to be equal.
5006 if (C1.getSignificantBits() > ExtSrcTyBits)
5007 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
5008
5009 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
5010 ExtDstTy != ExtSrcTy && "Unexpected types!");
5011 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
5012 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
5013 DAG.getConstant(Imm, dl, ExtDstTy));
5014 if (!DCI.isCalledByLegalizer())
5015 DCI.AddToWorklist(ZextOp.getNode());
5016 // Otherwise, make this a use of a zext.
5017 return DAG.getSetCC(dl, VT, ZextOp,
5018 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
5019 } else if ((N1C->isZero() || N1C->isOne()) &&
5020 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5021 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5022 // excluded as they are handled below whilst checking for foldBooleans.
5023 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5024 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
5025 (N0.getValueType() == MVT::i1 ||
5029 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5030 if (TrueWhenTrue)
5031 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
5032 // Invert the condition.
5033 if (N0.getOpcode() == ISD::SETCC) {
5036 if (DCI.isBeforeLegalizeOps() ||
5038 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5039 }
5040 }
5041
5042 if ((N0.getOpcode() == ISD::XOR ||
5043 (N0.getOpcode() == ISD::AND &&
5044 N0.getOperand(0).getOpcode() == ISD::XOR &&
5045 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5046 isOneConstant(N0.getOperand(1))) {
5047 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5048 // can only do this if the top bits are known zero.
5049 unsigned BitWidth = N0.getValueSizeInBits();
5050 if (DAG.MaskedValueIsZero(N0,
5052 BitWidth-1))) {
5053 // Okay, get the un-inverted input value.
5054 SDValue Val;
5055 if (N0.getOpcode() == ISD::XOR) {
5056 Val = N0.getOperand(0);
5057 } else {
5058 assert(N0.getOpcode() == ISD::AND &&
5059 N0.getOperand(0).getOpcode() == ISD::XOR);
5060 // ((X^1)&1)^1 -> X & 1
5061 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5062 N0.getOperand(0).getOperand(0),
5063 N0.getOperand(1));
5064 }
5065
5066 return DAG.getSetCC(dl, VT, Val, N1,
5068 }
5069 } else if (N1C->isOne()) {
5070 SDValue Op0 = N0;
5071 if (Op0.getOpcode() == ISD::TRUNCATE)
5072 Op0 = Op0.getOperand(0);
5073
5074 if ((Op0.getOpcode() == ISD::XOR) &&
5075 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5076 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5077 SDValue XorLHS = Op0.getOperand(0);
5078 SDValue XorRHS = Op0.getOperand(1);
5079 // Ensure that the input setccs return an i1 type or 0/1 value.
5080 if (Op0.getValueType() == MVT::i1 ||
5085 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5087 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5088 }
5089 }
5090 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5091 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5092 if (Op0.getValueType().bitsGT(VT))
5093 Op0 = DAG.getNode(ISD::AND, dl, VT,
5094 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5095 DAG.getConstant(1, dl, VT));
5096 else if (Op0.getValueType().bitsLT(VT))
5097 Op0 = DAG.getNode(ISD::AND, dl, VT,
5098 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5099 DAG.getConstant(1, dl, VT));
5100
5101 return DAG.getSetCC(dl, VT, Op0,
5102 DAG.getConstant(0, dl, Op0.getValueType()),
5104 }
5105 if (Op0.getOpcode() == ISD::AssertZext &&
5106 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5107 return DAG.getSetCC(dl, VT, Op0,
5108 DAG.getConstant(0, dl, Op0.getValueType()),
5110 }
5111 }
5112
5113 // Given:
5114 // icmp eq/ne (urem %x, %y), 0
5115 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5116 // icmp eq/ne %x, 0
5117 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5118 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5119 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5120 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5121 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5122 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5123 }
5124
5125 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5126 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5127 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5129 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5130 N1C->isAllOnes()) {
5131 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5132 DAG.getConstant(0, dl, OpVT),
5134 }
5135
5136 // fold (setcc (trunc x) c) -> (setcc x c)
5137 if (N0.getOpcode() == ISD::TRUNCATE &&
5139 (N0->getFlags().hasNoSignedWrap() &&
5142 EVT NewVT = N0.getOperand(0).getValueType();
5143 SDValue NewConst = DAG.getConstant(
5145 ? C1.sext(NewVT.getSizeInBits())
5146 : C1.zext(NewVT.getSizeInBits()),
5147 dl, NewVT);
5148 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5149 }
5150
5151 if (SDValue V =
5152 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5153 return V;
5154 }
5155
5156 // These simplifications apply to splat vectors as well.
5157 // TODO: Handle more splat vector cases.
5158 if (auto *N1C = isConstOrConstSplat(N1)) {
5159 const APInt &C1 = N1C->getAPIntValue();
5160
5161 APInt MinVal, MaxVal;
5162 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5164 MinVal = APInt::getSignedMinValue(OperandBitSize);
5165 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5166 } else {
5167 MinVal = APInt::getMinValue(OperandBitSize);
5168 MaxVal = APInt::getMaxValue(OperandBitSize);
5169 }
5170
5171 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5172 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5173 // X >= MIN --> true
5174 if (C1 == MinVal)
5175 return DAG.getBoolConstant(true, dl, VT, OpVT);
5176
5177 if (!VT.isVector()) { // TODO: Support this for vectors.
5178 // X >= C0 --> X > (C0 - 1)
5179 APInt C = C1 - 1;
5181 if ((DCI.isBeforeLegalizeOps() ||
5182 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5183 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5184 isLegalICmpImmediate(C.getSExtValue())))) {
5185 return DAG.getSetCC(dl, VT, N0,
5186 DAG.getConstant(C, dl, N1.getValueType()),
5187 NewCC);
5188 }
5189 }
5190 }
5191
5192 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5193 // X <= MAX --> true
5194 if (C1 == MaxVal)
5195 return DAG.getBoolConstant(true, dl, VT, OpVT);
5196
5197 // X <= C0 --> X < (C0 + 1)
5198 if (!VT.isVector()) { // TODO: Support this for vectors.
5199 APInt C = C1 + 1;
5201 if ((DCI.isBeforeLegalizeOps() ||
5202 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5203 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5204 isLegalICmpImmediate(C.getSExtValue())))) {
5205 return DAG.getSetCC(dl, VT, N0,
5206 DAG.getConstant(C, dl, N1.getValueType()),
5207 NewCC);
5208 }
5209 }
5210 }
5211
5212 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5213 if (C1 == MinVal)
5214 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5215
5216 // TODO: Support this for vectors after legalize ops.
5217 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5218 // Canonicalize setlt X, Max --> setne X, Max
5219 if (C1 == MaxVal)
5220 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5221
5222 // If we have setult X, 1, turn it into seteq X, 0
5223 if (C1 == MinVal+1)
5224 return DAG.getSetCC(dl, VT, N0,
5225 DAG.getConstant(MinVal, dl, N0.getValueType()),
5226 ISD::SETEQ);
5227 }
5228 }
5229
5230 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5231 if (C1 == MaxVal)
5232 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5233
5234 // TODO: Support this for vectors after legalize ops.
5235 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5236 // Canonicalize setgt X, Min --> setne X, Min
5237 if (C1 == MinVal)
5238 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5239
5240 // If we have setugt X, Max-1, turn it into seteq X, Max
5241 if (C1 == MaxVal-1)
5242 return DAG.getSetCC(dl, VT, N0,
5243 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5244 ISD::SETEQ);
5245 }
5246 }
5247
5248 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5249 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5250 if (C1.isZero())
5251 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5252 VT, N0, N1, Cond, DCI, dl))
5253 return CC;
5254
5255 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5256 // For example, when high 32-bits of i64 X are known clear:
5257 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5258 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5259 bool CmpZero = N1C->isZero();
5260 bool CmpNegOne = N1C->isAllOnes();
5261 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5262 // Match or(lo,shl(hi,bw/2)) pattern.
5263 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5264 unsigned EltBits = V.getScalarValueSizeInBits();
5265 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5266 return false;
5267 SDValue LHS = V.getOperand(0);
5268 SDValue RHS = V.getOperand(1);
5269 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5270 // Unshifted element must have zero upperbits.
5271 if (RHS.getOpcode() == ISD::SHL &&
5272 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5273 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5274 DAG.MaskedValueIsZero(LHS, HiBits)) {
5275 Lo = LHS;
5276 Hi = RHS.getOperand(0);
5277 return true;
5278 }
5279 if (LHS.getOpcode() == ISD::SHL &&
5280 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5281 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5282 DAG.MaskedValueIsZero(RHS, HiBits)) {
5283 Lo = RHS;
5284 Hi = LHS.getOperand(0);
5285 return true;
5286 }
5287 return false;
5288 };
5289
5290 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5291 unsigned EltBits = N0.getScalarValueSizeInBits();
5292 unsigned HalfBits = EltBits / 2;
5293 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5294 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5295 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5296 SDValue NewN0 =
5297 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5298 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5299 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5300 };
5301
5302 SDValue Lo, Hi;
5303 if (IsConcat(N0, Lo, Hi))
5304 return MergeConcat(Lo, Hi);
5305
5306 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5307 SDValue Lo0, Lo1, Hi0, Hi1;
5308 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5309 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5310 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5311 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5312 }
5313 }
5314 }
5315 }
5316
5317 // If we have "setcc X, C0", check to see if we can shrink the immediate
5318 // by changing cc.
5319 // TODO: Support this for vectors after legalize ops.
5320 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5321 // SETUGT X, SINTMAX -> SETLT X, 0
5322 // SETUGE X, SINTMIN -> SETLT X, 0
5323 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5324 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5325 return DAG.getSetCC(dl, VT, N0,
5326 DAG.getConstant(0, dl, N1.getValueType()),
5327 ISD::SETLT);
5328
5329 // SETULT X, SINTMIN -> SETGT X, -1
5330 // SETULE X, SINTMAX -> SETGT X, -1
5331 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5332 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5333 return DAG.getSetCC(dl, VT, N0,
5334 DAG.getAllOnesConstant(dl, N1.getValueType()),
5335 ISD::SETGT);
5336 }
5337 }
5338
5339 // Back to non-vector simplifications.
5340 // TODO: Can we do these for vector splats?
5341 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5342 const APInt &C1 = N1C->getAPIntValue();
5343 EVT ShValTy = N0.getValueType();
5344
5345 // Fold bit comparisons when we can. This will result in an
5346 // incorrect value when boolean false is negative one, unless
5347 // the bitsize is 1 in which case the false value is the same
5348 // in practice regardless of the representation.
5349 if ((VT.getSizeInBits() == 1 ||
5351 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5352 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5353 N0.getOpcode() == ISD::AND) {
5354 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5355 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5356 // Perform the xform if the AND RHS is a single bit.
5357 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5358 if (AndRHS->getAPIntValue().isPowerOf2() &&
5359 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5360 return DAG.getNode(
5361 ISD::TRUNCATE, dl, VT,
5362 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5363 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5364 }
5365 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5366 // (X & 8) == 8 --> (X & 8) >> 3
5367 // Perform the xform if C1 is a single bit.
5368 unsigned ShCt = C1.logBase2();
5369 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5370 return DAG.getNode(
5371 ISD::TRUNCATE, dl, VT,
5372 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5373 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5374 }
5375 }
5376 }
5377 }
5378
5379 if (C1.getSignificantBits() <= 64 &&
5381 // (X & -256) == 256 -> (X >> 8) == 1
5382 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5383 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5384 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5385 const APInt &AndRHSC = AndRHS->getAPIntValue();
5386 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5387 unsigned ShiftBits = AndRHSC.countr_zero();
5388 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5389 // If using an unsigned shift doesn't yield a legal compare
5390 // immediate, try using sra instead.
5391 APInt NewC = C1.lshr(ShiftBits);
5392 if (NewC.getSignificantBits() <= 64 &&
5394 APInt SignedC = C1.ashr(ShiftBits);
5395 if (SignedC.getSignificantBits() <= 64 &&
5397 SDValue Shift = DAG.getNode(
5398 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5399 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5400 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5401 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5402 }
5403 }
5404 SDValue Shift = DAG.getNode(
5405 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5406 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5407 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5408 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5409 }
5410 }
5411 }
5412 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5413 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5414 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5415 // X < 0x100000000 -> (X >> 32) < 1
5416 // X >= 0x100000000 -> (X >> 32) >= 1
5417 // X <= 0x0ffffffff -> (X >> 32) < 1
5418 // X > 0x0ffffffff -> (X >> 32) >= 1
5419 unsigned ShiftBits;
5420 APInt NewC = C1;
5421 ISD::CondCode NewCond = Cond;
5422 if (AdjOne) {
5423 ShiftBits = C1.countr_one();
5424 NewC = NewC + 1;
5425 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5426 } else {
5427 ShiftBits = C1.countr_zero();
5428 }
5429 NewC.lshrInPlace(ShiftBits);
5430 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5432 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5433 SDValue Shift =
5434 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5435 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5436 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5437 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5438 }
5439 }
5440 }
5441 }
5442
5444 auto *CFP = cast<ConstantFPSDNode>(N1);
5445 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5446
5447 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5448 // constant if knowing that the operand is non-nan is enough. We prefer to
5449 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5450 // materialize 0.0.
5451 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5452 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5453
5454 // setcc (fneg x), C -> setcc swap(pred) x, -C
5455 if (N0.getOpcode() == ISD::FNEG) {
5457 if (DCI.isBeforeLegalizeOps() ||
5458 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5459 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5460 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5461 }
5462 }
5463
5464 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5466 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5467 bool IsFabs = N0.getOpcode() == ISD::FABS;
5468 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5469 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5470 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5471 : (IsFabs ? fcInf : fcPosInf);
5472 if (Cond == ISD::SETUEQ)
5473 Flag |= fcNan;
5474 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5475 DAG.getTargetConstant(Flag, dl, MVT::i32));
5476 }
5477 }
5478
5479 // If the condition is not legal, see if we can find an equivalent one
5480 // which is legal.
5482 // If the comparison was an awkward floating-point == or != and one of
5483 // the comparison operands is infinity or negative infinity, convert the
5484 // condition to a less-awkward <= or >=.
5485 if (CFP->getValueAPF().isInfinity()) {
5486 bool IsNegInf = CFP->getValueAPF().isNegative();
5488 switch (Cond) {
5489 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5490 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5491 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5492 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5493 default: break;
5494 }
5495 if (NewCond != ISD::SETCC_INVALID &&
5496 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5497 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5498 }
5499 }
5500 }
5501
5502 if (N0 == N1) {
5503 // The sext(setcc()) => setcc() optimization relies on the appropriate
5504 // constant being emitted.
5505 assert(!N0.getValueType().isInteger() &&
5506 "Integer types should be handled by FoldSetCC");
5507
5508 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5509 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5510 if (UOF == 2) // FP operators that are undefined on NaNs.
5511 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5512 if (UOF == unsigned(EqTrue))
5513 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5514 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5515 // if it is not already.
5516 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5517 if (NewCond != Cond &&
5518 (DCI.isBeforeLegalizeOps() ||
5519 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5520 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5521 }
5522
5523 // ~X > ~Y --> Y > X
5524 // ~X < ~Y --> Y < X
5525 // ~X < C --> X > ~C
5526 // ~X > C --> X < ~C
5527 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5528 N0.getValueType().isInteger()) {
5529 if (isBitwiseNot(N0)) {
5530 if (isBitwiseNot(N1))
5531 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5532
5535 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5536 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5537 }
5538 }
5539 }
5540
5541 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5542 N0.getValueType().isInteger()) {
5543 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5544 N0.getOpcode() == ISD::XOR) {
5545 // Simplify (X+Y) == (X+Z) --> Y == Z
5546 if (N0.getOpcode() == N1.getOpcode()) {
5547 if (N0.getOperand(0) == N1.getOperand(0))
5548 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5549 if (N0.getOperand(1) == N1.getOperand(1))
5550 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5551 if (isCommutativeBinOp(N0.getOpcode())) {
5552 // If X op Y == Y op X, try other combinations.
5553 if (N0.getOperand(0) == N1.getOperand(1))
5554 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5555 Cond);
5556 if (N0.getOperand(1) == N1.getOperand(0))
5557 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5558 Cond);
5559 }
5560 }
5561
5562 // If RHS is a legal immediate value for a compare instruction, we need
5563 // to be careful about increasing register pressure needlessly.
5564 bool LegalRHSImm = false;
5565
5566 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5567 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5568 // Turn (X+C1) == C2 --> X == C2-C1
5569 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5570 return DAG.getSetCC(
5571 dl, VT, N0.getOperand(0),
5572 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5573 dl, N0.getValueType()),
5574 Cond);
5575
5576 // Turn (X^C1) == C2 --> X == C1^C2
5577 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5578 return DAG.getSetCC(
5579 dl, VT, N0.getOperand(0),
5580 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5581 dl, N0.getValueType()),
5582 Cond);
5583 }
5584
5585 // Turn (C1-X) == C2 --> X == C1-C2
5586 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5587 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5588 return DAG.getSetCC(
5589 dl, VT, N0.getOperand(1),
5590 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5591 dl, N0.getValueType()),
5592 Cond);
5593
5594 // Could RHSC fold directly into a compare?
5595 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5596 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5597 }
5598
5599 // (X+Y) == X --> Y == 0 and similar folds.
5600 // Don't do this if X is an immediate that can fold into a cmp
5601 // instruction and X+Y has other uses. It could be an induction variable
5602 // chain, and the transform would increase register pressure.
5603 if (!LegalRHSImm || N0.hasOneUse())
5604 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5605 return V;
5606 }
5607
5608 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5609 N1.getOpcode() == ISD::XOR)
5610 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5611 return V;
5612
5613 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5614 return V;
5615
5616 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5617 return V;
5618 }
5619
5620 // Fold remainder of division by a constant.
5621 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5622 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5623 // When division is cheap or optimizing for minimum size,
5624 // fall through to DIVREM creation by skipping this fold.
5625 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5626 if (N0.getOpcode() == ISD::UREM) {
5627 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5628 return Folded;
5629 } else if (N0.getOpcode() == ISD::SREM) {
5630 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5631 return Folded;
5632 }
5633 }
5634 }
5635
5636 // Fold away ALL boolean setcc's.
5637 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5638 SDValue Temp;
5639 switch (Cond) {
5640 default: llvm_unreachable("Unknown integer setcc!");
5641 case ISD::SETEQ: // X == Y -> ~(X^Y)
5642 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5643 N0 = DAG.getNOT(dl, Temp, OpVT);
5644 if (!DCI.isCalledByLegalizer())
5645 DCI.AddToWorklist(Temp.getNode());
5646 break;
5647 case ISD::SETNE: // X != Y --> (X^Y)
5648 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5649 break;
5650 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5651 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5652 Temp = DAG.getNOT(dl, N0, OpVT);
5653 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5654 if (!DCI.isCalledByLegalizer())
5655 DCI.AddToWorklist(Temp.getNode());
5656 break;
5657 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5658 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5659 Temp = DAG.getNOT(dl, N1, OpVT);
5660 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5661 if (!DCI.isCalledByLegalizer())
5662 DCI.AddToWorklist(Temp.getNode());
5663 break;
5664 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5665 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5666 Temp = DAG.getNOT(dl, N0, OpVT);
5667 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5668 if (!DCI.isCalledByLegalizer())
5669 DCI.AddToWorklist(Temp.getNode());
5670 break;
5671 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5672 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5673 Temp = DAG.getNOT(dl, N1, OpVT);
5674 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5675 break;
5676 }
5677 if (VT.getScalarType() != MVT::i1) {
5678 if (!DCI.isCalledByLegalizer())
5679 DCI.AddToWorklist(N0.getNode());
5680 // FIXME: If running after legalize, we probably can't do this.
5682 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5683 }
5684 return N0;
5685 }
5686
5687 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5688 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5689 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5691 N1->getFlags().hasNoUnsignedWrap()) ||
5693 N1->getFlags().hasNoSignedWrap())) &&
5695 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5696 }
5697
5698 // Fold (setcc (sub nsw a, b), zero, s??) -> (setcc a, b, s??)
5699 // TODO: Remove that .isVector() check
5700 if (VT.isVector() && isZeroOrZeroSplat(N1) && N0.getOpcode() == ISD::SUB &&
5702 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), Cond);
5703 }
5704
5705 // Could not fold it.
5706 return SDValue();
5707}
5708
5709/// Returns true (and the GlobalValue and the offset) if the node is a
5710/// GlobalAddress + offset.
5712 int64_t &Offset) const {
5713
5714 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5715
5716 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5717 GA = GASD->getGlobal();
5718 Offset += GASD->getOffset();
5719 return true;
5720 }
5721
5722 if (N->isAnyAdd()) {
5723 SDValue N1 = N->getOperand(0);
5724 SDValue N2 = N->getOperand(1);
5725 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5726 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5727 Offset += V->getSExtValue();
5728 return true;
5729 }
5730 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5731 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5732 Offset += V->getSExtValue();
5733 return true;
5734 }
5735 }
5736 }
5737
5738 return false;
5739}
5740
5742 DAGCombinerInfo &DCI) const {
5743 // Default implementation: no optimization.
5744 return SDValue();
5745}
5746
5747//===----------------------------------------------------------------------===//
5748// Inline Assembler Implementation Methods
5749//===----------------------------------------------------------------------===//
5750
5753 unsigned S = Constraint.size();
5754
5755 if (S == 1) {
5756 switch (Constraint[0]) {
5757 default: break;
5758 case 'r':
5759 return C_RegisterClass;
5760 case 'm': // memory
5761 case 'o': // offsetable
5762 case 'V': // not offsetable
5763 return C_Memory;
5764 case 'p': // Address.
5765 return C_Address;
5766 case 'n': // Simple Integer
5767 case 'E': // Floating Point Constant
5768 case 'F': // Floating Point Constant
5769 return C_Immediate;
5770 case 'i': // Simple Integer or Relocatable Constant
5771 case 's': // Relocatable Constant
5772 case 'X': // Allow ANY value.
5773 case 'I': // Target registers.
5774 case 'J':
5775 case 'K':
5776 case 'L':
5777 case 'M':
5778 case 'N':
5779 case 'O':
5780 case 'P':
5781 case '<':
5782 case '>':
5783 return C_Other;
5784 }
5785 }
5786
5787 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5788 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5789 return C_Memory;
5790 return C_Register;
5791 }
5792 return C_Unknown;
5793}
5794
5795/// Try to replace an X constraint, which matches anything, with another that
5796/// has more specific requirements based on the type of the corresponding
5797/// operand.
5798const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5799 if (ConstraintVT.isInteger())
5800 return "r";
5801 if (ConstraintVT.isFloatingPoint())
5802 return "f"; // works for many targets
5803 return nullptr;
5804}
5805
5807 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5808 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5809 return SDValue();
5810}
5811
5812/// Lower the specified operand into the Ops vector.
5813/// If it is invalid, don't add anything to Ops.
5815 StringRef Constraint,
5816 std::vector<SDValue> &Ops,
5817 SelectionDAG &DAG) const {
5818
5819 if (Constraint.size() > 1)
5820 return;
5821
5822 char ConstraintLetter = Constraint[0];
5823 switch (ConstraintLetter) {
5824 default: break;
5825 case 'X': // Allows any operand
5826 case 'i': // Simple Integer or Relocatable Constant
5827 case 'n': // Simple Integer
5828 case 's': { // Relocatable Constant
5829
5831 uint64_t Offset = 0;
5832
5833 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5834 // etc., since getelementpointer is variadic. We can't use
5835 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5836 // while in this case the GA may be furthest from the root node which is
5837 // likely an ISD::ADD.
5838 while (true) {
5839 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5840 // gcc prints these as sign extended. Sign extend value to 64 bits
5841 // now; without this it would get ZExt'd later in
5842 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5843 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5844 BooleanContent BCont = getBooleanContents(MVT::i64);
5845 ISD::NodeType ExtOpc =
5846 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5847 int64_t ExtVal =
5848 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5849 Ops.push_back(
5850 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5851 return;
5852 }
5853 if (ConstraintLetter != 'n') {
5854 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5855 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5856 GA->getValueType(0),
5857 Offset + GA->getOffset()));
5858 return;
5859 }
5860 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5861 Ops.push_back(DAG.getTargetBlockAddress(
5862 BA->getBlockAddress(), BA->getValueType(0),
5863 Offset + BA->getOffset(), BA->getTargetFlags()));
5864 return;
5865 }
5867 Ops.push_back(Op);
5868 return;
5869 }
5870 }
5871 const unsigned OpCode = Op.getOpcode();
5872 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5873 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5874 Op = Op.getOperand(1);
5875 // Subtraction is not commutative.
5876 else if (OpCode == ISD::ADD &&
5877 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5878 Op = Op.getOperand(0);
5879 else
5880 return;
5881 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5882 continue;
5883 }
5884 return;
5885 }
5886 break;
5887 }
5888 }
5889}
5890
5894
5895std::pair<unsigned, const TargetRegisterClass *>
5897 StringRef Constraint,
5898 MVT VT) const {
5899 if (!Constraint.starts_with("{"))
5900 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5901 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5902
5903 // Remove the braces from around the name.
5904 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5905
5906 std::pair<unsigned, const TargetRegisterClass *> R =
5907 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5908
5909 // Figure out which register class contains this reg.
5910 for (const TargetRegisterClass *RC : RI->regclasses()) {
5911 // If none of the value types for this register class are valid, we
5912 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5913 if (!isLegalRC(*RI, *RC))
5914 continue;
5915
5916 for (const MCPhysReg &PR : *RC) {
5917 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5918 std::pair<unsigned, const TargetRegisterClass *> S =
5919 std::make_pair(PR, RC);
5920
5921 // If this register class has the requested value type, return it,
5922 // otherwise keep searching and return the first class found
5923 // if no other is found which explicitly has the requested type.
5924 if (RI->isTypeLegalForClass(*RC, VT))
5925 return S;
5926 if (!R.second)
5927 R = S;
5928 }
5929 }
5930 }
5931
5932 return R;
5933}
5934
5935//===----------------------------------------------------------------------===//
5936// Constraint Selection.
5937
5938/// Return true of this is an input operand that is a matching constraint like
5939/// "4".
5941 assert(!ConstraintCode.empty() && "No known constraint!");
5942 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5943}
5944
5945/// If this is an input matching constraint, this method returns the output
5946/// operand it matches.
5948 assert(!ConstraintCode.empty() && "No known constraint!");
5949 return atoi(ConstraintCode.c_str());
5950}
5951
5952/// Split up the constraint string from the inline assembly value into the
5953/// specific constraints and their prefixes, and also tie in the associated
5954/// operand values.
5955/// If this returns an empty vector, and if the constraint string itself
5956/// isn't empty, there was an error parsing.
5959 const TargetRegisterInfo *TRI,
5960 const CallBase &Call) const {
5961 /// Information about all of the constraints.
5962 AsmOperandInfoVector ConstraintOperands;
5963 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5964 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5965
5966 // Do a prepass over the constraints, canonicalizing them, and building up the
5967 // ConstraintOperands list.
5968 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5969 unsigned ResNo = 0; // ResNo - The result number of the next output.
5970 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5971
5972 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5973 ConstraintOperands.emplace_back(std::move(CI));
5974 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5975
5976 // Update multiple alternative constraint count.
5977 if (OpInfo.multipleAlternatives.size() > maCount)
5978 maCount = OpInfo.multipleAlternatives.size();
5979
5980 OpInfo.ConstraintVT = MVT::Other;
5981
5982 // Compute the value type for each operand.
5983 switch (OpInfo.Type) {
5984 case InlineAsm::isOutput: {
5985 // Indirect outputs just consume an argument.
5986 if (OpInfo.isIndirect) {
5987 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5988 break;
5989 }
5990
5991 // The return value of the call is this value. As such, there is no
5992 // corresponding argument.
5993 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5994 EVT VT;
5995 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5996 VT = getAsmOperandValueType(DL, STy->getElementType(ResNo));
5997 } else {
5998 assert(ResNo == 0 && "Asm only has one result!");
5999 VT = getAsmOperandValueType(DL, Call.getType());
6000 }
6001 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6002 ++ResNo;
6003 break;
6004 }
6005 case InlineAsm::isInput:
6006 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6007 break;
6008 case InlineAsm::isLabel:
6009 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
6010 ++LabelNo;
6011 continue;
6013 // Nothing to do.
6014 break;
6015 }
6016
6017 if (OpInfo.CallOperandVal) {
6018 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
6019 if (OpInfo.isIndirect) {
6020 OpTy = Call.getParamElementType(ArgNo);
6021 assert(OpTy && "Indirect operand must have elementtype attribute");
6022 }
6023
6024 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6025 if (StructType *STy = dyn_cast<StructType>(OpTy))
6026 if (STy->getNumElements() == 1)
6027 OpTy = STy->getElementType(0);
6028
6029 // If OpTy is not a single value, it may be a struct/union that we
6030 // can tile with integers.
6031 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6032 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6033 switch (BitSize) {
6034 default: break;
6035 case 1:
6036 case 8:
6037 case 16:
6038 case 32:
6039 case 64:
6040 case 128:
6041 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
6042 break;
6043 }
6044 }
6045
6046 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6047 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6048 ArgNo++;
6049 }
6050 }
6051
6052 // If we have multiple alternative constraints, select the best alternative.
6053 if (!ConstraintOperands.empty()) {
6054 if (maCount) {
6055 unsigned bestMAIndex = 0;
6056 int bestWeight = -1;
6057 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6058 int weight = -1;
6059 unsigned maIndex;
6060 // Compute the sums of the weights for each alternative, keeping track
6061 // of the best (highest weight) one so far.
6062 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6063 int weightSum = 0;
6064 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6065 cIndex != eIndex; ++cIndex) {
6066 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6067 if (OpInfo.Type == InlineAsm::isClobber)
6068 continue;
6069
6070 // If this is an output operand with a matching input operand,
6071 // look up the matching input. If their types mismatch, e.g. one
6072 // is an integer, the other is floating point, or their sizes are
6073 // different, flag it as an maCantMatch.
6074 if (OpInfo.hasMatchingInput()) {
6075 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6076 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6077 if ((OpInfo.ConstraintVT.isInteger() !=
6078 Input.ConstraintVT.isInteger()) ||
6079 (OpInfo.ConstraintVT.getSizeInBits() !=
6080 Input.ConstraintVT.getSizeInBits())) {
6081 weightSum = -1; // Can't match.
6082 break;
6083 }
6084 }
6085 }
6086 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6087 if (weight == -1) {
6088 weightSum = -1;
6089 break;
6090 }
6091 weightSum += weight;
6092 }
6093 // Update best.
6094 if (weightSum > bestWeight) {
6095 bestWeight = weightSum;
6096 bestMAIndex = maIndex;
6097 }
6098 }
6099
6100 // Now select chosen alternative in each constraint.
6101 for (AsmOperandInfo &cInfo : ConstraintOperands)
6102 if (cInfo.Type != InlineAsm::isClobber)
6103 cInfo.selectAlternative(bestMAIndex);
6104 }
6105 }
6106
6107 // Check and hook up tied operands, choose constraint code to use.
6108 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6109 cIndex != eIndex; ++cIndex) {
6110 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6111
6112 // If this is an output operand with a matching input operand, look up the
6113 // matching input. If their types mismatch, e.g. one is an integer, the
6114 // other is floating point, or their sizes are different, flag it as an
6115 // error.
6116 if (OpInfo.hasMatchingInput()) {
6117 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6118
6119 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6120 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6121 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6122 OpInfo.ConstraintVT);
6123 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6124 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6125 Input.ConstraintVT);
6126 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6127 OpInfo.ConstraintVT.isFloatingPoint();
6128 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6129 Input.ConstraintVT.isFloatingPoint();
6130 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6131 (MatchRC.second != InputRC.second)) {
6132 report_fatal_error("Unsupported asm: input constraint"
6133 " with a matching output constraint of"
6134 " incompatible type!");
6135 }
6136 }
6137 }
6138 }
6139
6140 return ConstraintOperands;
6141}
6142
6143/// Return a number indicating our preference for chosing a type of constraint
6144/// over another, for the purpose of sorting them. Immediates are almost always
6145/// preferrable (when they can be emitted). A higher return value means a
6146/// stronger preference for one constraint type relative to another.
6147/// FIXME: We should prefer registers over memory but doing so may lead to
6148/// unrecoverable register exhaustion later.
6149/// https://github.com/llvm/llvm-project/issues/20571
6151 switch (CT) {
6154 return 4;
6157 return 3;
6159 return 2;
6161 return 1;
6163 return 0;
6164 }
6165 llvm_unreachable("Invalid constraint type");
6166}
6167
6168/// Examine constraint type and operand type and determine a weight value.
6169/// This object must already have been set up with the operand type
6170/// and the current alternative constraint selected.
6173 AsmOperandInfo &info, int maIndex) const {
6175 if (maIndex >= (int)info.multipleAlternatives.size())
6176 rCodes = &info.Codes;
6177 else
6178 rCodes = &info.multipleAlternatives[maIndex].Codes;
6179 ConstraintWeight BestWeight = CW_Invalid;
6180
6181 // Loop over the options, keeping track of the most general one.
6182 for (const std::string &rCode : *rCodes) {
6183 ConstraintWeight weight =
6184 getSingleConstraintMatchWeight(info, rCode.c_str());
6185 if (weight > BestWeight)
6186 BestWeight = weight;
6187 }
6188
6189 return BestWeight;
6190}
6191
6192/// Examine constraint type and operand type and determine a weight value.
6193/// This object must already have been set up with the operand type
6194/// and the current alternative constraint selected.
6197 AsmOperandInfo &info, const char *constraint) const {
6199 Value *CallOperandVal = info.CallOperandVal;
6200 // If we don't have a value, we can't do a match,
6201 // but allow it at the lowest weight.
6202 if (!CallOperandVal)
6203 return CW_Default;
6204 // Look at the constraint type.
6205 switch (*constraint) {
6206 case 'i': // immediate integer.
6207 case 'n': // immediate integer with a known value.
6208 if (isa<ConstantInt>(CallOperandVal))
6209 weight = CW_Constant;
6210 break;
6211 case 's': // non-explicit intregal immediate.
6212 if (isa<GlobalValue>(CallOperandVal))
6213 weight = CW_Constant;
6214 break;
6215 case 'E': // immediate float if host format.
6216 case 'F': // immediate float.
6217 if (isa<ConstantFP>(CallOperandVal))
6218 weight = CW_Constant;
6219 break;
6220 case '<': // memory operand with autodecrement.
6221 case '>': // memory operand with autoincrement.
6222 case 'm': // memory operand.
6223 case 'o': // offsettable memory operand
6224 case 'V': // non-offsettable memory operand
6225 weight = CW_Memory;
6226 break;
6227 case 'r': // general register.
6228 case 'g': // general register, memory operand or immediate integer.
6229 // note: Clang converts "g" to "imr".
6230 if (CallOperandVal->getType()->isIntegerTy())
6231 weight = CW_Register;
6232 break;
6233 case 'X': // any operand.
6234 default:
6235 weight = CW_Default;
6236 break;
6237 }
6238 return weight;
6239}
6240
6241/// If there are multiple different constraints that we could pick for this
6242/// operand (e.g. "imr") try to pick the 'best' one.
6243/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6244/// into seven classes:
6245/// Register -> one specific register
6246/// RegisterClass -> a group of regs
6247/// Memory -> memory
6248/// Address -> a symbolic memory reference
6249/// Immediate -> immediate values
6250/// Other -> magic values (such as "Flag Output Operands")
6251/// Unknown -> something we don't recognize yet and can't handle
6252/// Ideally, we would pick the most specific constraint possible: if we have
6253/// something that fits into a register, we would pick it. The problem here
6254/// is that if we have something that could either be in a register or in
6255/// memory that use of the register could cause selection of *other*
6256/// operands to fail: they might only succeed if we pick memory. Because of
6257/// this the heuristic we use is:
6258///
6259/// 1) If there is an 'other' constraint, and if the operand is valid for
6260/// that constraint, use it. This makes us take advantage of 'i'
6261/// constraints when available.
6262/// 2) Otherwise, pick the most general constraint present. This prefers
6263/// 'm' over 'r', for example.
6264///
6266 TargetLowering::AsmOperandInfo &OpInfo) const {
6267 ConstraintGroup Ret;
6268
6269 Ret.reserve(OpInfo.Codes.size());
6270 for (StringRef Code : OpInfo.Codes) {
6272
6273 // Indirect 'other' or 'immediate' constraints are not allowed.
6274 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6275 CType == TargetLowering::C_Register ||
6277 continue;
6278
6279 // Things with matching constraints can only be registers, per gcc
6280 // documentation. This mainly affects "g" constraints.
6281 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6282 continue;
6283
6284 Ret.emplace_back(Code, CType);
6285 }
6286
6288 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6289 });
6290
6291 return Ret;
6292}
6293
6294/// If we have an immediate, see if we can lower it. Return true if we can,
6295/// false otherwise.
6297 SDValue Op, SelectionDAG *DAG,
6298 const TargetLowering &TLI) {
6299
6300 assert((P.second == TargetLowering::C_Other ||
6301 P.second == TargetLowering::C_Immediate) &&
6302 "need immediate or other");
6303
6304 if (!Op.getNode())
6305 return false;
6306
6307 std::vector<SDValue> ResultOps;
6308 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6309 return !ResultOps.empty();
6310}
6311
6312/// Determines the constraint code and constraint type to use for the specific
6313/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6315 SDValue Op,
6316 SelectionDAG *DAG) const {
6317 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6318
6319 // Single-letter constraints ('r') are very common.
6320 if (OpInfo.Codes.size() == 1) {
6321 OpInfo.ConstraintCode = OpInfo.Codes[0];
6322 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6323 } else {
6325 if (G.empty())
6326 return;
6327
6328 unsigned BestIdx = 0;
6329 for (const unsigned E = G.size();
6330 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6331 G[BestIdx].second == TargetLowering::C_Immediate);
6332 ++BestIdx) {
6333 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6334 break;
6335 // If we're out of constraints, just pick the first one.
6336 if (BestIdx + 1 == E) {
6337 BestIdx = 0;
6338 break;
6339 }
6340 }
6341
6342 OpInfo.ConstraintCode = G[BestIdx].first;
6343 OpInfo.ConstraintType = G[BestIdx].second;
6344 }
6345
6346 // 'X' matches anything.
6347 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6348 // Constants are handled elsewhere. For Functions, the type here is the
6349 // type of the result, which is not what we want to look at; leave them
6350 // alone.
6351 Value *v = OpInfo.CallOperandVal;
6352 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6353 return;
6354 }
6355
6356 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6357 OpInfo.ConstraintCode = "i";
6358 return;
6359 }
6360
6361 // Otherwise, try to resolve it to something we know about by looking at
6362 // the actual operand type.
6363 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6364 OpInfo.ConstraintCode = Repl;
6365 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6366 }
6367 }
6368}
6369
6370/// Given an exact SDIV by a constant, create a multiplication
6371/// with the multiplicative inverse of the constant.
6372/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6374 const SDLoc &dl, SelectionDAG &DAG,
6375 SmallVectorImpl<SDNode *> &Created) {
6376 SDValue Op0 = N->getOperand(0);
6377 SDValue Op1 = N->getOperand(1);
6378 EVT VT = N->getValueType(0);
6379 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6380 EVT ShSVT = ShVT.getScalarType();
6381
6382 bool UseSRA = false;
6383 SmallVector<SDValue, 16> Shifts, Factors;
6384
6385 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6386 if (C->isZero())
6387 return false;
6388
6389 EVT CT = C->getValueType(0);
6390 APInt Divisor = C->getAPIntValue();
6391 unsigned Shift = Divisor.countr_zero();
6392 if (Shift) {
6393 Divisor.ashrInPlace(Shift);
6394 UseSRA = true;
6395 }
6396 APInt Factor = Divisor.multiplicativeInverse();
6397 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6398 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6399 return true;
6400 };
6401
6402 // Collect all magic values from the build vector.
6403 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern, /*AllowUndefs=*/false,
6404 /*AllowTruncation=*/true))
6405 return SDValue();
6406
6407 SDValue Shift, Factor;
6408 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6409 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6410 Factor = DAG.getBuildVector(VT, dl, Factors);
6411 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6412 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6413 "Expected matchUnaryPredicate to return one element for scalable "
6414 "vectors");
6415 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6416 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6417 } else {
6418 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6419 Shift = Shifts[0];
6420 Factor = Factors[0];
6421 }
6422
6423 SDValue Res = Op0;
6424 if (UseSRA) {
6425 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6426 Created.push_back(Res.getNode());
6427 }
6428
6429 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6430}
6431
6432/// Given an exact UDIV by a constant, create a multiplication
6433/// with the multiplicative inverse of the constant.
6434/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6436 const SDLoc &dl, SelectionDAG &DAG,
6437 SmallVectorImpl<SDNode *> &Created) {
6438 EVT VT = N->getValueType(0);
6439 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6440 EVT ShSVT = ShVT.getScalarType();
6441
6442 bool UseSRL = false;
6443 SmallVector<SDValue, 16> Shifts, Factors;
6444
6445 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6446 if (C->isZero())
6447 return false;
6448
6449 EVT CT = C->getValueType(0);
6450 APInt Divisor = C->getAPIntValue();
6451 unsigned Shift = Divisor.countr_zero();
6452 if (Shift) {
6453 Divisor.lshrInPlace(Shift);
6454 UseSRL = true;
6455 }
6456 // Calculate the multiplicative inverse modulo BW.
6457 APInt Factor = Divisor.multiplicativeInverse();
6458 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6459 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6460 return true;
6461 };
6462
6463 SDValue Op1 = N->getOperand(1);
6464
6465 // Collect all magic values from the build vector.
6466 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern, /*AllowUndefs=*/false,
6467 /*AllowTruncation=*/true))
6468 return SDValue();
6469
6470 SDValue Shift, Factor;
6471 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6472 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6473 Factor = DAG.getBuildVector(VT, dl, Factors);
6474 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6475 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6476 "Expected matchUnaryPredicate to return one element for scalable "
6477 "vectors");
6478 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6479 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6480 } else {
6481 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6482 Shift = Shifts[0];
6483 Factor = Factors[0];
6484 }
6485
6486 SDValue Res = N->getOperand(0);
6487 if (UseSRL) {
6488 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6489 Created.push_back(Res.getNode());
6490 }
6491
6492 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6493}
6494
6496 SelectionDAG &DAG,
6497 SmallVectorImpl<SDNode *> &Created) const {
6498 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6499 if (isIntDivCheap(N->getValueType(0), Attr))
6500 return SDValue(N, 0); // Lower SDIV as SDIV
6501 return SDValue();
6502}
6503
6504SDValue
6506 SelectionDAG &DAG,
6507 SmallVectorImpl<SDNode *> &Created) const {
6508 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6509 if (isIntDivCheap(N->getValueType(0), Attr))
6510 return SDValue(N, 0); // Lower SREM as SREM
6511 return SDValue();
6512}
6513
6514/// Build sdiv by power-of-2 with conditional move instructions
6515/// Ref: "Hacker's Delight" by Henry Warren 10-1
6516/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6517/// bgez x, label
6518/// add x, x, 2**k-1
6519/// label:
6520/// sra res, x, k
6521/// neg res, res (when the divisor is negative)
6523 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6524 SmallVectorImpl<SDNode *> &Created) const {
6525 unsigned Lg2 = Divisor.countr_zero();
6526 EVT VT = N->getValueType(0);
6527
6528 SDLoc DL(N);
6529 SDValue N0 = N->getOperand(0);
6530 SDValue Zero = DAG.getConstant(0, DL, VT);
6531 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6532 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6533
6534 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6535 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6536 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6537 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6538 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6539
6540 Created.push_back(Cmp.getNode());
6541 Created.push_back(Add.getNode());
6542 Created.push_back(CMov.getNode());
6543
6544 // Divide by pow2.
6545 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6546 DAG.getShiftAmountConstant(Lg2, VT, DL));
6547
6548 // If we're dividing by a positive value, we're done. Otherwise, we must
6549 // negate the result.
6550 if (Divisor.isNonNegative())
6551 return SRA;
6552
6553 Created.push_back(SRA.getNode());
6554 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6555}
6556
6557/// Given an ISD::SDIV node expressing a divide by constant,
6558/// return a DAG expression to select that will generate the same value by
6559/// multiplying by a magic number.
6560/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6562 bool IsAfterLegalization,
6563 bool IsAfterLegalTypes,
6564 SmallVectorImpl<SDNode *> &Created) const {
6565 SDLoc dl(N);
6566 EVT VT = N->getValueType(0);
6567 EVT SVT = VT.getScalarType();
6568 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6569 EVT ShSVT = ShVT.getScalarType();
6570 unsigned EltBits = VT.getScalarSizeInBits();
6571 EVT MulVT;
6572
6573 // Check to see if we can do this.
6574 // FIXME: We should be more aggressive here.
6575 if (!isTypeLegal(VT)) {
6576 // Limit this to simple scalars for now.
6577 if (VT.isVector() || !VT.isSimple())
6578 return SDValue();
6579
6580 // If this type will be promoted to a large enough type with a legal
6581 // multiply operation, we can go ahead and do this transform.
6583 return SDValue();
6584
6585 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6586 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6587 !isOperationLegal(ISD::MUL, MulVT))
6588 return SDValue();
6589 }
6590
6591 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6592 if (N->getFlags().hasExact())
6593 return BuildExactSDIV(*this, N, dl, DAG, Created);
6594
6595 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6596
6597 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6598 if (C->isZero())
6599 return false;
6600 // Truncate the divisor to the target scalar type in case it was promoted
6601 // during type legalization.
6602 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6604 int NumeratorFactor = 0;
6605 int ShiftMask = -1;
6606
6607 if (Divisor.isOne() || Divisor.isAllOnes()) {
6608 // If d is +1/-1, we just multiply the numerator by +1/-1.
6609 NumeratorFactor = Divisor.getSExtValue();
6610 magics.Magic = 0;
6611 magics.ShiftAmount = 0;
6612 ShiftMask = 0;
6613 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6614 // If d > 0 and m < 0, add the numerator.
6615 NumeratorFactor = 1;
6616 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6617 // If d < 0 and m > 0, subtract the numerator.
6618 NumeratorFactor = -1;
6619 }
6620
6621 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6622 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6623 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6624 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6625 return true;
6626 };
6627
6628 SDValue N0 = N->getOperand(0);
6629 SDValue N1 = N->getOperand(1);
6630
6631 // Collect the shifts / magic values from each element.
6632 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
6633 /*AllowTruncation=*/true))
6634 return SDValue();
6635
6636 SDValue MagicFactor, Factor, Shift, ShiftMask;
6637 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6638 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6639 Factor = DAG.getBuildVector(VT, dl, Factors);
6640 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6641 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6642 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6643 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6644 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6645 "Expected matchUnaryPredicate to return one element for scalable "
6646 "vectors");
6647 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6648 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6649 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6650 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6651 } else {
6652 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6653 MagicFactor = MagicFactors[0];
6654 Factor = Factors[0];
6655 Shift = Shifts[0];
6656 ShiftMask = ShiftMasks[0];
6657 }
6658
6659 // Multiply the numerator (operand 0) by the magic value.
6660 // FIXME: We should support doing a MUL in a wider type.
6661 auto GetMULHS = [&](SDValue X, SDValue Y) {
6662 // If the type isn't legal, use a wider mul of the type calculated
6663 // earlier.
6664 if (!isTypeLegal(VT)) {
6665 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6666 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6667 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6668 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6669 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6670 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6671 }
6672
6673 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6674 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6675 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6676 SDValue LoHi =
6677 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6678 return SDValue(LoHi.getNode(), 1);
6679 }
6680 // If type twice as wide legal, widen and use a mul plus a shift.
6681 unsigned Size = VT.getScalarSizeInBits();
6682 EVT WideVT = VT.changeElementType(
6683 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), Size * 2));
6684 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6685 // custom lowered. This is very expensive so avoid it at all costs for
6686 // constant divisors.
6687 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6690 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6691 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6692 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6693 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6694 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6695 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6696 }
6697 return SDValue();
6698 };
6699
6700 SDValue Q = GetMULHS(N0, MagicFactor);
6701 if (!Q)
6702 return SDValue();
6703
6704 Created.push_back(Q.getNode());
6705
6706 // (Optionally) Add/subtract the numerator using Factor.
6707 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6708 Created.push_back(Factor.getNode());
6709 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6710 Created.push_back(Q.getNode());
6711
6712 // Shift right algebraic by shift value.
6713 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6714 Created.push_back(Q.getNode());
6715
6716 // Extract the sign bit, mask it and add it to the quotient.
6717 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6718 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6719 Created.push_back(T.getNode());
6720 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6721 Created.push_back(T.getNode());
6722 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6723}
6724
6725/// Given an ISD::UDIV node expressing a divide by constant,
6726/// return a DAG expression to select that will generate the same value by
6727/// multiplying by a magic number.
6728/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6730 bool IsAfterLegalization,
6731 bool IsAfterLegalTypes,
6732 SmallVectorImpl<SDNode *> &Created) const {
6733 SDLoc dl(N);
6734 EVT VT = N->getValueType(0);
6735 EVT SVT = VT.getScalarType();
6736 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6737 EVT ShSVT = ShVT.getScalarType();
6738 unsigned EltBits = VT.getScalarSizeInBits();
6739 EVT MulVT;
6740
6741 // Check to see if we can do this.
6742 // FIXME: We should be more aggressive here.
6743 if (!isTypeLegal(VT)) {
6744 // Limit this to simple scalars for now.
6745 if (VT.isVector() || !VT.isSimple())
6746 return SDValue();
6747
6748 // If this type will be promoted to a large enough type with a legal
6749 // multiply operation, we can go ahead and do this transform.
6751 return SDValue();
6752
6753 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6754 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6755 !isOperationLegal(ISD::MUL, MulVT))
6756 return SDValue();
6757 }
6758
6759 // If the udiv has an 'exact' bit we can use a simpler lowering.
6760 if (N->getFlags().hasExact())
6761 return BuildExactUDIV(*this, N, dl, DAG, Created);
6762
6763 SDValue N0 = N->getOperand(0);
6764 SDValue N1 = N->getOperand(1);
6765
6766 // Try to use leading zeros of the dividend to reduce the multiplier and
6767 // avoid expensive fixups.
6768 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6769
6770 // If we're after type legalization and SVT is not legal, use the
6771 // promoted type for creating constants to avoid creating nodes with
6772 // illegal types.
6773 if (IsAfterLegalTypes && VT.isVector()) {
6774 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6775 if (SVT.bitsLT(VT.getScalarType()))
6776 return SDValue();
6777 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6778 if (ShSVT.bitsLT(ShVT.getScalarType()))
6779 return SDValue();
6780 }
6781 const unsigned SVTBits = SVT.getSizeInBits();
6782
6783 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6784 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6785
6786 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6787 if (C->isZero())
6788 return false;
6789 // Truncate the divisor to the target scalar type in case it was promoted
6790 // during type legalization.
6791 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6792
6793 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6794
6795 // Magic algorithm doesn't work for division by 1. We need to emit a select
6796 // at the end.
6797 if (Divisor.isOne()) {
6798 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6799 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6800 } else {
6803 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6804
6805 MagicFactor = DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT);
6806
6807 assert(magics.PreShift < Divisor.getBitWidth() &&
6808 "We shouldn't generate an undefined shift!");
6809 assert(magics.PostShift < Divisor.getBitWidth() &&
6810 "We shouldn't generate an undefined shift!");
6811 assert((!magics.IsAdd || magics.PreShift == 0) &&
6812 "Unexpected pre-shift");
6813 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6814 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6815 NPQFactor = DAG.getConstant(
6816 magics.IsAdd ? APInt::getOneBitSet(SVTBits, EltBits - 1)
6817 : APInt::getZero(SVTBits),
6818 dl, SVT);
6819 UseNPQ |= magics.IsAdd;
6820 UsePreShift |= magics.PreShift != 0;
6821 UsePostShift |= magics.PostShift != 0;
6822 }
6823
6824 PreShifts.push_back(PreShift);
6825 MagicFactors.push_back(MagicFactor);
6826 NPQFactors.push_back(NPQFactor);
6827 PostShifts.push_back(PostShift);
6828 return true;
6829 };
6830
6831 // Collect the shifts/magic values from each element.
6832 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
6833 /*AllowTruncation=*/true))
6834 return SDValue();
6835
6836 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6837 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6838 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6839 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6840 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6841 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6842 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6843 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6844 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6845 "Expected matchUnaryPredicate to return one for scalable vectors");
6846 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6847 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6848 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6849 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6850 } else {
6851 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6852 PreShift = PreShifts[0];
6853 MagicFactor = MagicFactors[0];
6854 PostShift = PostShifts[0];
6855 }
6856
6857 SDValue Q = N0;
6858 if (UsePreShift) {
6859 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6860 Created.push_back(Q.getNode());
6861 }
6862
6863 // FIXME: We should support doing a MUL in a wider type.
6864 auto GetMULHU = [&](SDValue X, SDValue Y) {
6865 // If the type isn't legal, use a wider mul of the type calculated
6866 // earlier.
6867 if (!isTypeLegal(VT)) {
6868 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6869 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6870 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6871 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6872 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6873 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6874 }
6875
6876 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6877 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6878 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6879 SDValue LoHi =
6880 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6881 return SDValue(LoHi.getNode(), 1);
6882 }
6883 // If type twice as wide legal, widen and use a mul plus a shift.
6884 unsigned Size = VT.getScalarSizeInBits();
6885 EVT WideVT = VT.changeElementType(
6886 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), Size * 2));
6887 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6888 // custom lowered. This is very expensive so avoid it at all costs for
6889 // constant divisors.
6890 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6893 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6894 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6895 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6896 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6897 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6898 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6899 }
6900 return SDValue(); // No mulhu or equivalent
6901 };
6902
6903 // Multiply the numerator (operand 0) by the magic value.
6904 Q = GetMULHU(Q, MagicFactor);
6905 if (!Q)
6906 return SDValue();
6907
6908 Created.push_back(Q.getNode());
6909
6910 if (UseNPQ) {
6911 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6912 Created.push_back(NPQ.getNode());
6913
6914 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6915 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6916 if (VT.isVector())
6917 NPQ = GetMULHU(NPQ, NPQFactor);
6918 else
6919 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6920
6921 Created.push_back(NPQ.getNode());
6922
6923 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6924 Created.push_back(Q.getNode());
6925 }
6926
6927 if (UsePostShift) {
6928 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6929 Created.push_back(Q.getNode());
6930 }
6931
6932 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6933
6934 SDValue One = DAG.getConstant(1, dl, VT);
6935 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6936 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6937}
6938
6939/// If all values in Values that *don't* match the predicate are same 'splat'
6940/// value, then replace all values with that splat value.
6941/// Else, if AlternativeReplacement was provided, then replace all values that
6942/// do match predicate with AlternativeReplacement value.
6943static void
6945 std::function<bool(SDValue)> Predicate,
6946 SDValue AlternativeReplacement = SDValue()) {
6947 SDValue Replacement;
6948 // Is there a value for which the Predicate does *NOT* match? What is it?
6949 auto SplatValue = llvm::find_if_not(Values, Predicate);
6950 if (SplatValue != Values.end()) {
6951 // Does Values consist only of SplatValue's and values matching Predicate?
6952 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6953 return Value == *SplatValue || Predicate(Value);
6954 })) // Then we shall replace values matching predicate with SplatValue.
6955 Replacement = *SplatValue;
6956 }
6957 if (!Replacement) {
6958 // Oops, we did not find the "baseline" splat value.
6959 if (!AlternativeReplacement)
6960 return; // Nothing to do.
6961 // Let's replace with provided value then.
6962 Replacement = AlternativeReplacement;
6963 }
6964 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6965}
6966
6967/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6968/// where the divisor is constant and the comparison target is zero,
6969/// return a DAG expression that will generate the same comparison result
6970/// using only multiplications, additions and shifts/rotations.
6971/// Ref: "Hacker's Delight" 10-17.
6972SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6973 SDValue CompTargetNode,
6975 DAGCombinerInfo &DCI,
6976 const SDLoc &DL) const {
6978 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6979 DCI, DL, Built)) {
6980 for (SDNode *N : Built)
6981 DCI.AddToWorklist(N);
6982 return Folded;
6983 }
6984
6985 return SDValue();
6986}
6987
6988SDValue
6989TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6990 SDValue CompTargetNode, ISD::CondCode Cond,
6991 DAGCombinerInfo &DCI, const SDLoc &DL,
6992 SmallVectorImpl<SDNode *> &Created) const {
6993 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6994 // - D must be constant, with D = D0 * 2^K where D0 is odd
6995 // - P is the multiplicative inverse of D0 modulo 2^W
6996 // - Q = floor(((2^W) - 1) / D)
6997 // where W is the width of the common type of N and D.
6998 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6999 "Only applicable for (in)equality comparisons.");
7000
7001 SelectionDAG &DAG = DCI.DAG;
7002
7003 EVT VT = REMNode.getValueType();
7004 EVT SVT = VT.getScalarType();
7005 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7006 EVT ShSVT = ShVT.getScalarType();
7007
7008 // If MUL is unavailable, we cannot proceed in any case.
7009 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7010 return SDValue();
7011
7012 bool ComparingWithAllZeros = true;
7013 bool AllComparisonsWithNonZerosAreTautological = true;
7014 bool HadTautologicalLanes = false;
7015 bool AllLanesAreTautological = true;
7016 bool HadEvenDivisor = false;
7017 bool AllDivisorsArePowerOfTwo = true;
7018 bool HadTautologicalInvertedLanes = false;
7019 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
7020
7021 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
7022 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7023 if (CDiv->isZero())
7024 return false;
7025
7026 const APInt &D = CDiv->getAPIntValue();
7027 const APInt &Cmp = CCmp->getAPIntValue();
7028
7029 ComparingWithAllZeros &= Cmp.isZero();
7030
7031 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7032 // if C2 is not less than C1, the comparison is always false.
7033 // But we will only be able to produce the comparison that will give the
7034 // opposive tautological answer. So this lane would need to be fixed up.
7035 bool TautologicalInvertedLane = D.ule(Cmp);
7036 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
7037
7038 // If all lanes are tautological (either all divisors are ones, or divisor
7039 // is not greater than the constant we are comparing with),
7040 // we will prefer to avoid the fold.
7041 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
7042 HadTautologicalLanes |= TautologicalLane;
7043 AllLanesAreTautological &= TautologicalLane;
7044
7045 // If we are comparing with non-zero, we need'll need to subtract said
7046 // comparison value from the LHS. But there is no point in doing that if
7047 // every lane where we are comparing with non-zero is tautological..
7048 if (!Cmp.isZero())
7049 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7050
7051 // Decompose D into D0 * 2^K
7052 unsigned K = D.countr_zero();
7053 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7054 APInt D0 = D.lshr(K);
7055
7056 // D is even if it has trailing zeros.
7057 HadEvenDivisor |= (K != 0);
7058 // D is a power-of-two if D0 is one.
7059 // If all divisors are power-of-two, we will prefer to avoid the fold.
7060 AllDivisorsArePowerOfTwo &= D0.isOne();
7061
7062 // P = inv(D0, 2^W)
7063 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7064 unsigned W = D.getBitWidth();
7065 APInt P = D0.multiplicativeInverse();
7066 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7067
7068 // Q = floor((2^W - 1) u/ D)
7069 // R = ((2^W - 1) u% D)
7070 APInt Q, R;
7072
7073 // If we are comparing with zero, then that comparison constant is okay,
7074 // else it may need to be one less than that.
7075 if (Cmp.ugt(R))
7076 Q -= 1;
7077
7079 "We are expecting that K is always less than all-ones for ShSVT");
7080
7081 // If the lane is tautological the result can be constant-folded.
7082 if (TautologicalLane) {
7083 // Set P and K amount to a bogus values so we can try to splat them.
7084 P = 0;
7085 K = -1;
7086 // And ensure that comparison constant is tautological,
7087 // it will always compare true/false.
7088 Q = -1;
7089 }
7090
7091 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7092 KAmts.push_back(
7093 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7094 /*implicitTrunc=*/true),
7095 DL, ShSVT));
7096 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7097 return true;
7098 };
7099
7100 SDValue N = REMNode.getOperand(0);
7101 SDValue D = REMNode.getOperand(1);
7102
7103 // Collect the values from each element.
7104 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7105 return SDValue();
7106
7107 // If all lanes are tautological, the result can be constant-folded.
7108 if (AllLanesAreTautological)
7109 return SDValue();
7110
7111 // If this is a urem by a powers-of-two, avoid the fold since it can be
7112 // best implemented as a bit test.
7113 if (AllDivisorsArePowerOfTwo)
7114 return SDValue();
7115
7116 SDValue PVal, KVal, QVal;
7117 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7118 if (HadTautologicalLanes) {
7119 // Try to turn PAmts into a splat, since we don't care about the values
7120 // that are currently '0'. If we can't, just keep '0'`s.
7122 // Try to turn KAmts into a splat, since we don't care about the values
7123 // that are currently '-1'. If we can't, change them to '0'`s.
7125 DAG.getConstant(0, DL, ShSVT));
7126 }
7127
7128 PVal = DAG.getBuildVector(VT, DL, PAmts);
7129 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7130 QVal = DAG.getBuildVector(VT, DL, QAmts);
7131 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7132 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7133 "Expected matchBinaryPredicate to return one element for "
7134 "SPLAT_VECTORs");
7135 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7136 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7137 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7138 } else {
7139 PVal = PAmts[0];
7140 KVal = KAmts[0];
7141 QVal = QAmts[0];
7142 }
7143
7144 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7145 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7146 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7147 assert(CompTargetNode.getValueType() == N.getValueType() &&
7148 "Expecting that the types on LHS and RHS of comparisons match.");
7149 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7150 }
7151
7152 // (mul N, P)
7153 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7154 Created.push_back(Op0.getNode());
7155
7156 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7157 // divisors as a performance improvement, since rotating by 0 is a no-op.
7158 if (HadEvenDivisor) {
7159 // We need ROTR to do this.
7160 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7161 return SDValue();
7162 // UREM: (rotr (mul N, P), K)
7163 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7164 Created.push_back(Op0.getNode());
7165 }
7166
7167 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7168 SDValue NewCC =
7169 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7171 if (!HadTautologicalInvertedLanes)
7172 return NewCC;
7173
7174 // If any lanes previously compared always-false, the NewCC will give
7175 // always-true result for them, so we need to fixup those lanes.
7176 // Or the other way around for inequality predicate.
7177 assert(VT.isVector() && "Can/should only get here for vectors.");
7178 Created.push_back(NewCC.getNode());
7179
7180 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7181 // if C2 is not less than C1, the comparison is always false.
7182 // But we have produced the comparison that will give the
7183 // opposive tautological answer. So these lanes would need to be fixed up.
7184 SDValue TautologicalInvertedChannels =
7185 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7186 Created.push_back(TautologicalInvertedChannels.getNode());
7187
7188 // NOTE: we avoid letting illegal types through even if we're before legalize
7189 // ops – legalization has a hard time producing good code for this.
7190 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7191 // If we have a vector select, let's replace the comparison results in the
7192 // affected lanes with the correct tautological result.
7193 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7194 DL, SETCCVT, SETCCVT);
7195 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7196 Replacement, NewCC);
7197 }
7198
7199 // Else, we can just invert the comparison result in the appropriate lanes.
7200 //
7201 // NOTE: see the note above VSELECT above.
7202 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7203 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7204 TautologicalInvertedChannels);
7205
7206 return SDValue(); // Don't know how to lower.
7207}
7208
7209/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7210/// where the divisor is constant and the comparison target is zero,
7211/// return a DAG expression that will generate the same comparison result
7212/// using only multiplications, additions and shifts/rotations.
7213/// Ref: "Hacker's Delight" 10-17.
7214SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7215 SDValue CompTargetNode,
7217 DAGCombinerInfo &DCI,
7218 const SDLoc &DL) const {
7220 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7221 DCI, DL, Built)) {
7222 assert(Built.size() <= 7 && "Max size prediction failed.");
7223 for (SDNode *N : Built)
7224 DCI.AddToWorklist(N);
7225 return Folded;
7226 }
7227
7228 return SDValue();
7229}
7230
7231SDValue
7232TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7233 SDValue CompTargetNode, ISD::CondCode Cond,
7234 DAGCombinerInfo &DCI, const SDLoc &DL,
7235 SmallVectorImpl<SDNode *> &Created) const {
7236 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7237 // Fold:
7238 // (seteq/ne (srem N, D), 0)
7239 // To:
7240 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7241 //
7242 // - D must be constant, with D = D0 * 2^K where D0 is odd
7243 // - P is the multiplicative inverse of D0 modulo 2^W
7244 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7245 // - Q = floor((2 * A) / (2^K))
7246 // where W is the width of the common type of N and D.
7247 //
7248 // When D is a power of two (and thus D0 is 1), the normal
7249 // formula for A and Q don't apply, because the derivation
7250 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7251 // does not apply. This specifically fails when N = INT_MIN.
7252 //
7253 // Instead, for power-of-two D, we use:
7254 // - A = 2^(W-1)
7255 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7256 // - Q = 2^(W-K) - 1
7257 // |-> Test that the top K bits are zero after rotation
7258 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7259 "Only applicable for (in)equality comparisons.");
7260
7261 SelectionDAG &DAG = DCI.DAG;
7262
7263 EVT VT = REMNode.getValueType();
7264 EVT SVT = VT.getScalarType();
7265 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7266 EVT ShSVT = ShVT.getScalarType();
7267
7268 // If we are after ops legalization, and MUL is unavailable, we can not
7269 // proceed.
7270 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7271 return SDValue();
7272
7273 // TODO: Could support comparing with non-zero too.
7274 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7275 if (!CompTarget || !CompTarget->isZero())
7276 return SDValue();
7277
7278 bool HadIntMinDivisor = false;
7279 bool HadOneDivisor = false;
7280 bool AllDivisorsAreOnes = true;
7281 bool HadEvenDivisor = false;
7282 bool NeedToApplyOffset = false;
7283 bool AllDivisorsArePowerOfTwo = true;
7284 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7285
7286 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7287 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7288 if (C->isZero())
7289 return false;
7290
7291 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7292
7293 // WARNING: this fold is only valid for positive divisors!
7294 APInt D = C->getAPIntValue();
7295 if (D.isNegative())
7296 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7297
7298 HadIntMinDivisor |= D.isMinSignedValue();
7299
7300 // If all divisors are ones, we will prefer to avoid the fold.
7301 HadOneDivisor |= D.isOne();
7302 AllDivisorsAreOnes &= D.isOne();
7303
7304 // Decompose D into D0 * 2^K
7305 unsigned K = D.countr_zero();
7306 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7307 APInt D0 = D.lshr(K);
7308
7309 if (!D.isMinSignedValue()) {
7310 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7311 // we don't care about this lane in this fold, we'll special-handle it.
7312 HadEvenDivisor |= (K != 0);
7313 }
7314
7315 // D is a power-of-two if D0 is one. This includes INT_MIN.
7316 // If all divisors are power-of-two, we will prefer to avoid the fold.
7317 AllDivisorsArePowerOfTwo &= D0.isOne();
7318
7319 // P = inv(D0, 2^W)
7320 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7321 unsigned W = D.getBitWidth();
7322 APInt P = D0.multiplicativeInverse();
7323 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7324
7325 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7326 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7327 A.clearLowBits(K);
7328
7329 if (!D.isMinSignedValue()) {
7330 // If divisor INT_MIN, then we don't care about this lane in this fold,
7331 // we'll special-handle it.
7332 NeedToApplyOffset |= A != 0;
7333 }
7334
7335 // Q = floor((2 * A) / (2^K))
7336 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7337
7339 "We are expecting that A is always less than all-ones for SVT");
7341 "We are expecting that K is always less than all-ones for ShSVT");
7342
7343 // If D was a power of two, apply the alternate constant derivation.
7344 if (D0.isOne()) {
7345 // A = 2^(W-1)
7347 // - Q = 2^(W-K) - 1
7348 Q = APInt::getAllOnes(W - K).zext(W);
7349 }
7350
7351 // If the divisor is 1 the result can be constant-folded. Likewise, we
7352 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7353 if (D.isOne()) {
7354 // Set P, A and K to a bogus values so we can try to splat them.
7355 P = 0;
7356 A = -1;
7357 K = -1;
7358
7359 // x ?% 1 == 0 <--> true <--> x u<= -1
7360 Q = -1;
7361 }
7362
7363 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7364 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7365 KAmts.push_back(
7366 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7367 /*implicitTrunc=*/true),
7368 DL, ShSVT));
7369 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7370 return true;
7371 };
7372
7373 SDValue N = REMNode.getOperand(0);
7374 SDValue D = REMNode.getOperand(1);
7375
7376 // Collect the values from each element.
7377 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7378 return SDValue();
7379
7380 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7381 if (AllDivisorsAreOnes)
7382 return SDValue();
7383
7384 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7385 // since it can be best implemented as a bit test.
7386 if (AllDivisorsArePowerOfTwo)
7387 return SDValue();
7388
7389 SDValue PVal, AVal, KVal, QVal;
7390 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7391 if (HadOneDivisor) {
7392 // Try to turn PAmts into a splat, since we don't care about the values
7393 // that are currently '0'. If we can't, just keep '0'`s.
7395 // Try to turn AAmts into a splat, since we don't care about the
7396 // values that are currently '-1'. If we can't, change them to '0'`s.
7398 DAG.getConstant(0, DL, SVT));
7399 // Try to turn KAmts into a splat, since we don't care about the values
7400 // that are currently '-1'. If we can't, change them to '0'`s.
7402 DAG.getConstant(0, DL, ShSVT));
7403 }
7404
7405 PVal = DAG.getBuildVector(VT, DL, PAmts);
7406 AVal = DAG.getBuildVector(VT, DL, AAmts);
7407 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7408 QVal = DAG.getBuildVector(VT, DL, QAmts);
7409 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7410 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7411 QAmts.size() == 1 &&
7412 "Expected matchUnaryPredicate to return one element for scalable "
7413 "vectors");
7414 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7415 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7416 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7417 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7418 } else {
7419 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7420 PVal = PAmts[0];
7421 AVal = AAmts[0];
7422 KVal = KAmts[0];
7423 QVal = QAmts[0];
7424 }
7425
7426 // (mul N, P)
7427 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7428 Created.push_back(Op0.getNode());
7429
7430 if (NeedToApplyOffset) {
7431 // We need ADD to do this.
7432 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7433 return SDValue();
7434
7435 // (add (mul N, P), A)
7436 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7437 Created.push_back(Op0.getNode());
7438 }
7439
7440 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7441 // divisors as a performance improvement, since rotating by 0 is a no-op.
7442 if (HadEvenDivisor) {
7443 // We need ROTR to do this.
7444 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7445 return SDValue();
7446 // SREM: (rotr (add (mul N, P), A), K)
7447 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7448 Created.push_back(Op0.getNode());
7449 }
7450
7451 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7452 SDValue Fold =
7453 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7455
7456 // If we didn't have lanes with INT_MIN divisor, then we're done.
7457 if (!HadIntMinDivisor)
7458 return Fold;
7459
7460 // That fold is only valid for positive divisors. Which effectively means,
7461 // it is invalid for INT_MIN divisors. So if we have such a lane,
7462 // we must fix-up results for said lanes.
7463 assert(VT.isVector() && "Can/should only get here for vectors.");
7464
7465 // NOTE: we avoid letting illegal types through even if we're before legalize
7466 // ops – legalization has a hard time producing good code for the code that
7467 // follows.
7468 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7472 return SDValue();
7473
7474 Created.push_back(Fold.getNode());
7475
7476 SDValue IntMin = DAG.getConstant(
7478 SDValue IntMax = DAG.getConstant(
7480 SDValue Zero =
7482
7483 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7484 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7485 Created.push_back(DivisorIsIntMin.getNode());
7486
7487 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7488 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7489 Created.push_back(Masked.getNode());
7490 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7491 Created.push_back(MaskedIsZero.getNode());
7492
7493 // To produce final result we need to blend 2 vectors: 'SetCC' and
7494 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7495 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7496 // constant-folded, select can get lowered to a shuffle with constant mask.
7497 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7498 MaskedIsZero, Fold);
7499
7500 return Blended;
7501}
7502
7504 const DenormalMode &Mode) const {
7505 SDLoc DL(Op);
7506 EVT VT = Op.getValueType();
7507 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7508 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7509
7510 // This is specifically a check for the handling of denormal inputs, not the
7511 // result.
7512 if (Mode.Input == DenormalMode::PreserveSign ||
7513 Mode.Input == DenormalMode::PositiveZero) {
7514 // Test = X == 0.0
7515 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7516 }
7517
7518 // Testing it with denormal inputs to avoid wrong estimate.
7519 //
7520 // Test = fabs(X) < SmallestNormal
7521 const fltSemantics &FltSem = VT.getFltSemantics();
7522 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7523 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7524 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7525 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7526}
7527
7529 bool LegalOps, bool OptForSize,
7531 unsigned Depth) const {
7532 // fneg is removable even if it has multiple uses.
7533 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7535 return Op.getOperand(0);
7536 }
7537
7538 // Don't recurse exponentially.
7540 return SDValue();
7541
7542 // Pre-increment recursion depth for use in recursive calls.
7543 ++Depth;
7544 const SDNodeFlags Flags = Op->getFlags();
7545 EVT VT = Op.getValueType();
7546 unsigned Opcode = Op.getOpcode();
7547
7548 // Don't allow anything with multiple uses unless we know it is free.
7549 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7550 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7551 isFPExtFree(VT, Op.getOperand(0).getValueType());
7552 if (!IsFreeExtend)
7553 return SDValue();
7554 }
7555
7556 auto RemoveDeadNode = [&](SDValue N) {
7557 if (N && N.getNode()->use_empty())
7558 DAG.RemoveDeadNode(N.getNode());
7559 };
7560
7561 SDLoc DL(Op);
7562
7563 // Because getNegatedExpression can delete nodes we need a handle to keep
7564 // temporary nodes alive in case the recursion manages to create an identical
7565 // node.
7566 std::list<HandleSDNode> Handles;
7567
7568 switch (Opcode) {
7569 case ISD::ConstantFP: {
7570 // Don't invert constant FP values after legalization unless the target says
7571 // the negated constant is legal.
7572 bool IsOpLegal =
7574 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7575 OptForSize);
7576
7577 if (LegalOps && !IsOpLegal)
7578 break;
7579
7580 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7581 V.changeSign();
7582 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7583
7584 // If we already have the use of the negated floating constant, it is free
7585 // to negate it even it has multiple uses.
7586 if (!Op.hasOneUse() && CFP.use_empty())
7587 break;
7589 return CFP;
7590 }
7591 case ISD::SPLAT_VECTOR: {
7592 // fold splat_vector(fneg(X)) -> splat_vector(-X)
7593 SDValue X = Op.getOperand(0);
7595 break;
7596
7597 SDValue NegX = getCheaperNegatedExpression(X, DAG, LegalOps, OptForSize);
7598 if (!NegX)
7599 break;
7601 return DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, NegX);
7602 }
7603 case ISD::BUILD_VECTOR: {
7604 // Only permit BUILD_VECTOR of constants.
7605 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7606 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7607 }))
7608 break;
7609
7610 bool IsOpLegal =
7613 llvm::all_of(Op->op_values(), [&](SDValue N) {
7614 return N.isUndef() ||
7615 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7616 OptForSize);
7617 });
7618
7619 if (LegalOps && !IsOpLegal)
7620 break;
7621
7623 for (SDValue C : Op->op_values()) {
7624 if (C.isUndef()) {
7625 Ops.push_back(C);
7626 continue;
7627 }
7628 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7629 V.changeSign();
7630 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7631 }
7633 return DAG.getBuildVector(VT, DL, Ops);
7634 }
7635 case ISD::FADD: {
7636 if (!Flags.hasNoSignedZeros())
7637 break;
7638
7639 // After operation legalization, it might not be legal to create new FSUBs.
7640 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7641 break;
7642 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7643
7644 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7646 SDValue NegX =
7647 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7648 // Prevent this node from being deleted by the next call.
7649 if (NegX)
7650 Handles.emplace_back(NegX);
7651
7652 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7654 SDValue NegY =
7655 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7656
7657 // We're done with the handles.
7658 Handles.clear();
7659
7660 // Negate the X if its cost is less or equal than Y.
7661 if (NegX && (CostX <= CostY)) {
7662 Cost = CostX;
7663 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7664 if (NegY != N)
7665 RemoveDeadNode(NegY);
7666 return N;
7667 }
7668
7669 // Negate the Y if it is not expensive.
7670 if (NegY) {
7671 Cost = CostY;
7672 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7673 if (NegX != N)
7674 RemoveDeadNode(NegX);
7675 return N;
7676 }
7677 break;
7678 }
7679 case ISD::FSUB: {
7680 // We can't turn -(A-B) into B-A when we honor signed zeros.
7681 if (!Flags.hasNoSignedZeros())
7682 break;
7683
7684 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7685 // fold (fneg (fsub 0, Y)) -> Y
7686 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7687 if (C->isZero()) {
7689 return Y;
7690 }
7691
7692 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7694 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7695 }
7696 case ISD::FMUL:
7697 case ISD::FDIV: {
7698 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7699
7700 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7702 SDValue NegX =
7703 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7704 // Prevent this node from being deleted by the next call.
7705 if (NegX)
7706 Handles.emplace_back(NegX);
7707
7708 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7710 SDValue NegY =
7711 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7712
7713 // We're done with the handles.
7714 Handles.clear();
7715
7716 // Negate the X if its cost is less or equal than Y.
7717 if (NegX && (CostX <= CostY)) {
7718 Cost = CostX;
7719 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7720 if (NegY != N)
7721 RemoveDeadNode(NegY);
7722 return N;
7723 }
7724
7725 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7726 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7727 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7728 break;
7729
7730 // Negate the Y if it is not expensive.
7731 if (NegY) {
7732 Cost = CostY;
7733 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7734 if (NegX != N)
7735 RemoveDeadNode(NegX);
7736 return N;
7737 }
7738 break;
7739 }
7740 case ISD::FMA:
7741 case ISD::FMULADD:
7742 case ISD::FMAD: {
7743 if (!Flags.hasNoSignedZeros())
7744 break;
7745
7746 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7748 SDValue NegZ =
7749 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7750 // Give up if fail to negate the Z.
7751 if (!NegZ)
7752 break;
7753
7754 // Prevent this node from being deleted by the next two calls.
7755 Handles.emplace_back(NegZ);
7756
7757 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7759 SDValue NegX =
7760 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7761 // Prevent this node from being deleted by the next call.
7762 if (NegX)
7763 Handles.emplace_back(NegX);
7764
7765 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7767 SDValue NegY =
7768 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7769
7770 // We're done with the handles.
7771 Handles.clear();
7772
7773 // Negate the X if its cost is less or equal than Y.
7774 if (NegX && (CostX <= CostY)) {
7775 Cost = std::min(CostX, CostZ);
7776 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7777 if (NegY != N)
7778 RemoveDeadNode(NegY);
7779 return N;
7780 }
7781
7782 // Negate the Y if it is not expensive.
7783 if (NegY) {
7784 Cost = std::min(CostY, CostZ);
7785 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7786 if (NegX != N)
7787 RemoveDeadNode(NegX);
7788 return N;
7789 }
7790 break;
7791 }
7792
7793 case ISD::FP_EXTEND:
7794 case ISD::FSIN:
7795 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7796 OptForSize, Cost, Depth))
7797 return DAG.getNode(Opcode, DL, VT, NegV);
7798 break;
7799 case ISD::FP_ROUND:
7800 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7801 OptForSize, Cost, Depth))
7802 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7803 break;
7804 case ISD::SELECT:
7805 case ISD::VSELECT: {
7806 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7807 // iff at least one cost is cheaper and the other is neutral/cheaper
7808 SDValue LHS = Op.getOperand(1);
7810 SDValue NegLHS =
7811 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7812 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7813 RemoveDeadNode(NegLHS);
7814 break;
7815 }
7816
7817 // Prevent this node from being deleted by the next call.
7818 Handles.emplace_back(NegLHS);
7819
7820 SDValue RHS = Op.getOperand(2);
7822 SDValue NegRHS =
7823 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7824
7825 // We're done with the handles.
7826 Handles.clear();
7827
7828 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7829 (CostLHS != NegatibleCost::Cheaper &&
7830 CostRHS != NegatibleCost::Cheaper)) {
7831 RemoveDeadNode(NegLHS);
7832 RemoveDeadNode(NegRHS);
7833 break;
7834 }
7835
7836 Cost = std::min(CostLHS, CostRHS);
7837 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7838 }
7839 }
7840
7841 return SDValue();
7842}
7843
7844//===----------------------------------------------------------------------===//
7845// Legalization Utilities
7846//===----------------------------------------------------------------------===//
7847
7848bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7849 SDValue LHS, SDValue RHS,
7851 EVT HiLoVT, SelectionDAG &DAG,
7852 MulExpansionKind Kind, SDValue LL,
7853 SDValue LH, SDValue RL, SDValue RH) const {
7854 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7855 Opcode == ISD::SMUL_LOHI);
7856
7857 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7859 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7861 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7863 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7865
7866 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7867 return false;
7868
7869 unsigned OuterBitSize = VT.getScalarSizeInBits();
7870 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7871
7872 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7873 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7874 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7875
7876 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7877 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7878 bool Signed) -> bool {
7879 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7880 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7881 Hi = SDValue(Lo.getNode(), 1);
7882 return true;
7883 }
7884 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7885 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7886 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7887 return true;
7888 }
7889 return false;
7890 };
7891
7892 SDValue Lo, Hi;
7893
7894 if (!LL.getNode() && !RL.getNode() &&
7896 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7897 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7898 }
7899
7900 if (!LL.getNode())
7901 return false;
7902
7903 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7904 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7905 DAG.MaskedValueIsZero(RHS, HighMask)) {
7906 // The inputs are both zero-extended.
7907 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7908 Result.push_back(Lo);
7909 Result.push_back(Hi);
7910 if (Opcode != ISD::MUL) {
7911 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7912 Result.push_back(Zero);
7913 Result.push_back(Zero);
7914 }
7915 return true;
7916 }
7917 }
7918
7919 if (!VT.isVector() && Opcode == ISD::MUL &&
7920 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7921 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7922 // The input values are both sign-extended.
7923 // TODO non-MUL case?
7924 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7925 Result.push_back(Lo);
7926 Result.push_back(Hi);
7927 return true;
7928 }
7929 }
7930
7931 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7932 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7933
7934 if (!LH.getNode() && !RH.getNode() &&
7937 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7938 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7939 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7940 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7941 }
7942
7943 if (!LH.getNode())
7944 return false;
7945
7946 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7947 return false;
7948
7949 Result.push_back(Lo);
7950
7951 if (Opcode == ISD::MUL) {
7952 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7953 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7954 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7955 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7956 Result.push_back(Hi);
7957 return true;
7958 }
7959
7960 // Compute the full width result.
7961 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7962 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7963 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7964 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7965 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7966 };
7967
7968 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7969 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7970 return false;
7971
7972 // This is effectively the add part of a multiply-add of half-sized operands,
7973 // so it cannot overflow.
7974 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7975
7976 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7977 return false;
7978
7979 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7980 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7981
7982 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7984 if (UseGlue)
7985 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7986 Merge(Lo, Hi));
7987 else
7988 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7989 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7990
7991 SDValue Carry = Next.getValue(1);
7992 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7993 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7994
7995 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7996 return false;
7997
7998 if (UseGlue)
7999 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
8000 Carry);
8001 else
8002 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
8003 Zero, Carry);
8004
8005 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8006
8007 if (Opcode == ISD::SMUL_LOHI) {
8008 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8009 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
8010 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
8011
8012 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8013 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
8014 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
8015 }
8016
8017 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8018 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8019 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8020 return true;
8021}
8022
8024 SelectionDAG &DAG, MulExpansionKind Kind,
8025 SDValue LL, SDValue LH, SDValue RL,
8026 SDValue RH) const {
8028 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
8029 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
8030 DAG, Kind, LL, LH, RL, RH);
8031 if (Ok) {
8032 assert(Result.size() == 2);
8033 Lo = Result[0];
8034 Hi = Result[1];
8035 }
8036 return Ok;
8037}
8038
8039// Optimize unsigned division or remainder by constants for types twice as large
8040// as a legal VT.
8041//
8042// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
8043// can be computed
8044// as:
8045// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
8046// Remainder = Sum % Constant
8047// This is based on "Remainder by Summing Digits" from Hacker's Delight.
8048//
8049// For division, we can compute the remainder using the algorithm described
8050// above, subtract it from the dividend to get an exact multiple of Constant.
8051// Then multiply that exact multiply by the multiplicative inverse modulo
8052// (1 << (BitWidth / 2)) to get the quotient.
8053
8054// If Constant is even, we can shift right the dividend and the divisor by the
8055// number of trailing zeros in Constant before applying the remainder algorithm.
8056// If we're after the quotient, we can subtract this value from the shifted
8057// dividend and multiply by the multiplicative inverse of the shifted divisor.
8058// If we want the remainder, we shift the value left by the number of trailing
8059// zeros and add the bits that were shifted out of the dividend.
8062 EVT HiLoVT, SelectionDAG &DAG,
8063 SDValue LL, SDValue LH) const {
8064 unsigned Opcode = N->getOpcode();
8065 EVT VT = N->getValueType(0);
8066
8067 // TODO: Support signed division/remainder.
8068 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8069 return false;
8070 assert(
8071 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8072 "Unexpected opcode");
8073
8074 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8075 if (!CN)
8076 return false;
8077
8078 APInt Divisor = CN->getAPIntValue();
8079 unsigned BitWidth = Divisor.getBitWidth();
8080 unsigned HBitWidth = BitWidth / 2;
8082 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8083
8084 // Divisor needs to less than (1 << HBitWidth).
8085 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8086 if (Divisor.uge(HalfMaxPlus1))
8087 return false;
8088
8089 // We depend on the UREM by constant optimization in DAGCombiner that requires
8090 // high multiply.
8091 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8093 return false;
8094
8095 // Don't expand if optimizing for size.
8096 if (DAG.shouldOptForSize())
8097 return false;
8098
8099 // Early out for 0 or 1 divisors.
8100 if (Divisor.ule(1))
8101 return false;
8102
8103 // If the divisor is even, shift it until it becomes odd.
8104 unsigned TrailingZeros = 0;
8105 if (!Divisor[0]) {
8106 TrailingZeros = Divisor.countr_zero();
8107 Divisor.lshrInPlace(TrailingZeros);
8108 }
8109
8110 SDLoc dl(N);
8111 SDValue Sum;
8112 SDValue PartialRem;
8113
8114 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
8115 // then add in the carry.
8116 // TODO: If we can't split it in half, we might be able to split into 3 or
8117 // more pieces using a smaller bit width.
8118 if (HalfMaxPlus1.urem(Divisor).isOne()) {
8119 assert(!LL == !LH && "Expected both input halves or no input halves!");
8120 if (!LL)
8121 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8122
8123 // Shift the input by the number of TrailingZeros in the divisor. The
8124 // shifted out bits will be added to the remainder later.
8125 if (TrailingZeros) {
8126 // Save the shifted off bits if we need the remainder.
8127 if (Opcode != ISD::UDIV) {
8128 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8129 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8130 DAG.getConstant(Mask, dl, HiLoVT));
8131 }
8132
8133 LL = DAG.getNode(
8134 ISD::OR, dl, HiLoVT,
8135 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
8136 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
8137 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
8138 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
8139 HiLoVT, dl)));
8140 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
8141 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8142 }
8143
8144 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8145 EVT SetCCType =
8146 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8148 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8149 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8150 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8151 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8152 } else {
8153 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8154 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8155 // If the boolean for the target is 0 or 1, we can add the setcc result
8156 // directly.
8157 if (getBooleanContents(HiLoVT) ==
8159 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8160 else
8161 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8162 DAG.getConstant(0, dl, HiLoVT));
8163 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8164 }
8165 }
8166
8167 // If we didn't find a sum, we can't do the expansion.
8168 if (!Sum)
8169 return false;
8170
8171 // Perform a HiLoVT urem on the Sum using truncated divisor.
8172 SDValue RemL =
8173 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8174 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8175 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8176
8177 if (Opcode != ISD::UREM) {
8178 // Subtract the remainder from the shifted dividend.
8179 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8180 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8181
8182 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8183
8184 // Multiply by the multiplicative inverse of the divisor modulo
8185 // (1 << BitWidth).
8186 APInt MulFactor = Divisor.multiplicativeInverse();
8187
8188 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8189 DAG.getConstant(MulFactor, dl, VT));
8190
8191 // Split the quotient into low and high parts.
8192 SDValue QuotL, QuotH;
8193 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8194 Result.push_back(QuotL);
8195 Result.push_back(QuotH);
8196 }
8197
8198 if (Opcode != ISD::UDIV) {
8199 // If we shifted the input, shift the remainder left and add the bits we
8200 // shifted off the input.
8201 if (TrailingZeros) {
8202 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8203 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8204 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
8205 }
8206 Result.push_back(RemL);
8207 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
8208 }
8209
8210 return true;
8211}
8212
8213// Check that (every element of) Z is undef or not an exact multiple of BW.
8214static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8216 Z,
8217 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8218 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8219}
8220
8222 EVT VT = Node->getValueType(0);
8223 SDValue ShX, ShY;
8224 SDValue ShAmt, InvShAmt;
8225 SDValue X = Node->getOperand(0);
8226 SDValue Y = Node->getOperand(1);
8227 SDValue Z = Node->getOperand(2);
8228 SDValue Mask = Node->getOperand(3);
8229 SDValue VL = Node->getOperand(4);
8230
8231 unsigned BW = VT.getScalarSizeInBits();
8232 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8233 SDLoc DL(SDValue(Node, 0));
8234
8235 EVT ShVT = Z.getValueType();
8236 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8237 // fshl: X << C | Y >> (BW - C)
8238 // fshr: X << (BW - C) | Y >> C
8239 // where C = Z % BW is not zero
8240 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8241 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8242 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8243 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8244 VL);
8245 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8246 VL);
8247 } else {
8248 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8249 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8250 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8251 if (isPowerOf2_32(BW)) {
8252 // Z % BW -> Z & (BW - 1)
8253 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8254 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8255 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8256 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8257 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8258 } else {
8259 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8260 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8261 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8262 }
8263
8264 SDValue One = DAG.getConstant(1, DL, ShVT);
8265 if (IsFSHL) {
8266 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8267 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8268 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8269 } else {
8270 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8271 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8272 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8273 }
8274 }
8275 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8276}
8277
8279 SelectionDAG &DAG) const {
8280 if (Node->isVPOpcode())
8281 return expandVPFunnelShift(Node, DAG);
8282
8283 EVT VT = Node->getValueType(0);
8284
8285 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8289 return SDValue();
8290
8291 SDValue X = Node->getOperand(0);
8292 SDValue Y = Node->getOperand(1);
8293 SDValue Z = Node->getOperand(2);
8294
8295 unsigned BW = VT.getScalarSizeInBits();
8296 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8297 SDLoc DL(SDValue(Node, 0));
8298
8299 EVT ShVT = Z.getValueType();
8300
8301 // If a funnel shift in the other direction is more supported, use it.
8302 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8303 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8304 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8305 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8306 // fshl X, Y, Z -> fshr X, Y, -Z
8307 // fshr X, Y, Z -> fshl X, Y, -Z
8308 Z = DAG.getNegative(Z, DL, ShVT);
8309 } else {
8310 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8311 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8312 SDValue One = DAG.getConstant(1, DL, ShVT);
8313 if (IsFSHL) {
8314 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8315 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8316 } else {
8317 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8318 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8319 }
8320 Z = DAG.getNOT(DL, Z, ShVT);
8321 }
8322 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8323 }
8324
8325 SDValue ShX, ShY;
8326 SDValue ShAmt, InvShAmt;
8327 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8328 // fshl: X << C | Y >> (BW - C)
8329 // fshr: X << (BW - C) | Y >> C
8330 // where C = Z % BW is not zero
8331 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8332 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8333 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8334 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8335 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8336 } else {
8337 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8338 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8339 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8340 if (isPowerOf2_32(BW)) {
8341 // Z % BW -> Z & (BW - 1)
8342 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8343 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8344 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8345 } else {
8346 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8347 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8348 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8349 }
8350
8351 SDValue One = DAG.getConstant(1, DL, ShVT);
8352 if (IsFSHL) {
8353 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8354 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8355 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8356 } else {
8357 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8358 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8359 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8360 }
8361 }
8362 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8363}
8364
8365// TODO: Merge with expandFunnelShift.
8367 SelectionDAG &DAG) const {
8368 EVT VT = Node->getValueType(0);
8369 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8370 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8371 SDValue Op0 = Node->getOperand(0);
8372 SDValue Op1 = Node->getOperand(1);
8373 SDLoc DL(SDValue(Node, 0));
8374
8375 EVT ShVT = Op1.getValueType();
8376 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8377
8378 // If a rotate in the other direction is more supported, use it.
8379 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8380 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8381 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8382 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8383 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8384 }
8385
8386 if (!AllowVectorOps && VT.isVector() &&
8392 return SDValue();
8393
8394 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8395 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8396 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8397 SDValue ShVal;
8398 SDValue HsVal;
8399 if (isPowerOf2_32(EltSizeInBits)) {
8400 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8401 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8402 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8403 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8404 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8405 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8406 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8407 } else {
8408 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8409 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8410 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8411 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8412 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8413 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8414 SDValue One = DAG.getConstant(1, DL, ShVT);
8415 HsVal =
8416 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8417 }
8418 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8419}
8420
8422 SDLoc DL(Node);
8423 EVT VT = Node->getValueType(0);
8424 SDValue X = Node->getOperand(0);
8425 SDValue Y = Node->getOperand(1);
8426 unsigned BW = VT.getScalarSizeInBits();
8427 unsigned Opcode = Node->getOpcode();
8428
8429 switch (Opcode) {
8430 case ISD::CLMUL: {
8431 // NOTE: If you change this expansion, please update the cost model
8432 // calculation in BasicTTIImpl::getTypeBasedIntrinsicInstrCost for
8433 // Intrinsic::clmul.
8434 SDValue Res = DAG.getConstant(0, DL, VT);
8435 for (unsigned I = 0; I < BW; ++I) {
8436 SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT);
8437 SDValue YMasked = DAG.getNode(ISD::AND, DL, VT, Y, Mask);
8438 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, X, YMasked);
8439 Res = DAG.getNode(ISD::XOR, DL, VT, Res, Mul);
8440 }
8441 return Res;
8442 }
8443 case ISD::CLMULR:
8444 // If we have CLMUL/CLMULH, merge the shifted results to form CLMULR.
8447 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, VT, X, Y);
8448 SDValue Hi = DAG.getNode(ISD::CLMULH, DL, VT, X, Y);
8449 Lo = DAG.getNode(ISD::SRL, DL, VT, Lo,
8450 DAG.getShiftAmountConstant(BW - 1, VT, DL));
8451 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8452 DAG.getShiftAmountConstant(1, VT, DL));
8453 return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
8454 }
8455 [[fallthrough]];
8456 case ISD::CLMULH: {
8457 EVT ExtVT = VT.changeElementType(
8458 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), 2 * BW));
8459 // For example, ExtVT = i64 based operations aren't legal on a 32-bit
8460 // target; use bitreverse-based lowering in this case.
8463 SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X);
8464 SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, Y);
8465 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev);
8466 SDValue Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ClMul);
8467 if (Opcode == ISD::CLMULH)
8468 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
8469 DAG.getShiftAmountConstant(1, VT, DL));
8470 return Res;
8471 }
8472 SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X);
8473 SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y);
8474 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8475 unsigned ShAmt = Opcode == ISD::CLMULR ? BW - 1 : BW;
8476 SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul,
8477 DAG.getShiftAmountConstant(ShAmt, ExtVT, DL));
8478 return DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits);
8479 }
8480 }
8481 llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH");
8482}
8483
8485 SelectionDAG &DAG) const {
8486 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8487 EVT VT = Node->getValueType(0);
8488 unsigned VTBits = VT.getScalarSizeInBits();
8489 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8490
8491 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8492 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8493 SDValue ShOpLo = Node->getOperand(0);
8494 SDValue ShOpHi = Node->getOperand(1);
8495 SDValue ShAmt = Node->getOperand(2);
8496 EVT ShAmtVT = ShAmt.getValueType();
8497 EVT ShAmtCCVT =
8498 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8499 SDLoc dl(Node);
8500
8501 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8502 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8503 // away during isel.
8504 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8505 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8506 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8507 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8508 : DAG.getConstant(0, dl, VT);
8509
8510 SDValue Tmp2, Tmp3;
8511 if (IsSHL) {
8512 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8513 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8514 } else {
8515 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8516 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8517 }
8518
8519 // If the shift amount is larger or equal than the width of a part we don't
8520 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8521 // values for large shift amounts.
8522 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8523 DAG.getConstant(VTBits, dl, ShAmtVT));
8524 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8525 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8526
8527 if (IsSHL) {
8528 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8529 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8530 } else {
8531 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8532 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8533 }
8534}
8535
8537 SelectionDAG &DAG) const {
8538 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8539 SDValue Src = Node->getOperand(OpNo);
8540 EVT SrcVT = Src.getValueType();
8541 EVT DstVT = Node->getValueType(0);
8542 SDLoc dl(SDValue(Node, 0));
8543
8544 // FIXME: Only f32 to i64 conversions are supported.
8545 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8546 return false;
8547
8548 if (Node->isStrictFPOpcode())
8549 // When a NaN is converted to an integer a trap is allowed. We can't
8550 // use this expansion here because it would eliminate that trap. Other
8551 // traps are also allowed and cannot be eliminated. See
8552 // IEEE 754-2008 sec 5.8.
8553 return false;
8554
8555 // Expand f32 -> i64 conversion
8556 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8557 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8558 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8559 EVT IntVT = SrcVT.changeTypeToInteger();
8560 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8561
8562 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8563 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8564 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8565 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8566 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8567 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8568
8569 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8570
8571 SDValue ExponentBits = DAG.getNode(
8572 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8573 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8574 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8575
8576 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8577 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8578 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8579 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8580
8581 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8582 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8583 DAG.getConstant(0x00800000, dl, IntVT));
8584
8585 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8586
8587 R = DAG.getSelectCC(
8588 dl, Exponent, ExponentLoBit,
8589 DAG.getNode(ISD::SHL, dl, DstVT, R,
8590 DAG.getZExtOrTrunc(
8591 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8592 dl, IntShVT)),
8593 DAG.getNode(ISD::SRL, dl, DstVT, R,
8594 DAG.getZExtOrTrunc(
8595 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8596 dl, IntShVT)),
8597 ISD::SETGT);
8598
8599 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8600 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8601
8602 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8603 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8604 return true;
8605}
8606
8608 SDValue &Chain,
8609 SelectionDAG &DAG) const {
8610 SDLoc dl(SDValue(Node, 0));
8611 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8612 SDValue Src = Node->getOperand(OpNo);
8613
8614 EVT SrcVT = Src.getValueType();
8615 EVT DstVT = Node->getValueType(0);
8616 EVT SetCCVT =
8617 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8618 EVT DstSetCCVT =
8619 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8620
8621 // Only expand vector types if we have the appropriate vector bit operations.
8622 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8624 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8626 return false;
8627
8628 // If the maximum float value is smaller then the signed integer range,
8629 // the destination signmask can't be represented by the float, so we can
8630 // just use FP_TO_SINT directly.
8631 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8632 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8633 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8635 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8636 if (Node->isStrictFPOpcode()) {
8637 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8638 { Node->getOperand(0), Src });
8639 Chain = Result.getValue(1);
8640 } else
8641 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8642 return true;
8643 }
8644
8645 // Don't expand it if there isn't cheap fsub instruction.
8647 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8648 return false;
8649
8650 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8651 SDValue Sel;
8652
8653 if (Node->isStrictFPOpcode()) {
8654 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8655 Node->getOperand(0), /*IsSignaling*/ true);
8656 Chain = Sel.getValue(1);
8657 } else {
8658 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8659 }
8660
8661 bool Strict = Node->isStrictFPOpcode() ||
8662 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8663
8664 if (Strict) {
8665 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8666 // signmask then offset (the result of which should be fully representable).
8667 // Sel = Src < 0x8000000000000000
8668 // FltOfs = select Sel, 0, 0x8000000000000000
8669 // IntOfs = select Sel, 0, 0x8000000000000000
8670 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8671
8672 // TODO: Should any fast-math-flags be set for the FSUB?
8673 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8674 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8675 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8676 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8677 DAG.getConstant(0, dl, DstVT),
8678 DAG.getConstant(SignMask, dl, DstVT));
8679 SDValue SInt;
8680 if (Node->isStrictFPOpcode()) {
8681 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8682 { Chain, Src, FltOfs });
8683 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8684 { Val.getValue(1), Val });
8685 Chain = SInt.getValue(1);
8686 } else {
8687 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8688 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8689 }
8690 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8691 } else {
8692 // Expand based on maximum range of FP_TO_SINT:
8693 // True = fp_to_sint(Src)
8694 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8695 // Result = select (Src < 0x8000000000000000), True, False
8696
8697 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8698 // TODO: Should any fast-math-flags be set for the FSUB?
8699 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8700 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8701 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8702 DAG.getConstant(SignMask, dl, DstVT));
8703 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8704 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8705 }
8706 return true;
8707}
8708
8710 SDValue &Chain, SelectionDAG &DAG) const {
8711 // This transform is not correct for converting 0 when rounding mode is set
8712 // to round toward negative infinity which will produce -0.0. So disable
8713 // under strictfp.
8714 if (Node->isStrictFPOpcode())
8715 return false;
8716
8717 SDValue Src = Node->getOperand(0);
8718 EVT SrcVT = Src.getValueType();
8719 EVT DstVT = Node->getValueType(0);
8720
8721 // If the input is known to be non-negative and SINT_TO_FP is legal then use
8722 // it.
8723 if (Node->getFlags().hasNonNeg() &&
8725 Result =
8726 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
8727 return true;
8728 }
8729
8730 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8731 return false;
8732
8733 // Only expand vector types if we have the appropriate vector bit
8734 // operations.
8735 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8740 return false;
8741
8742 SDLoc dl(SDValue(Node, 0));
8743
8744 // Implementation of unsigned i64 to f64 following the algorithm in
8745 // __floatundidf in compiler_rt. This implementation performs rounding
8746 // correctly in all rounding modes with the exception of converting 0
8747 // when rounding toward negative infinity. In that case the fsub will
8748 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
8749 // incorrect.
8750 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8751 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8752 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8753 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8754 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8755 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
8756
8757 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8758 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8759 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8760 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8761 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8762 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8763 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8764 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8765 return true;
8766}
8767
8768SDValue
8770 SelectionDAG &DAG) const {
8771 unsigned Opcode = Node->getOpcode();
8772 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8773 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8774 "Wrong opcode");
8775
8776 if (Node->getFlags().hasNoNaNs()) {
8777 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8778 EVT VT = Node->getValueType(0);
8779 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8781 VT.isVector())
8782 return SDValue();
8783 SDValue Op1 = Node->getOperand(0);
8784 SDValue Op2 = Node->getOperand(1);
8785 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
8786 Node->getFlags());
8787 }
8788
8789 return SDValue();
8790}
8791
8793 SelectionDAG &DAG) const {
8794 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
8795 return Expanded;
8796
8797 EVT VT = Node->getValueType(0);
8798 if (VT.isScalableVector())
8800 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8801
8802 SDLoc dl(Node);
8803 unsigned NewOp =
8805
8806 if (isOperationLegalOrCustom(NewOp, VT)) {
8807 SDValue Quiet0 = Node->getOperand(0);
8808 SDValue Quiet1 = Node->getOperand(1);
8809
8810 if (!Node->getFlags().hasNoNaNs()) {
8811 // Insert canonicalizes if it's possible we need to quiet to get correct
8812 // sNaN behavior.
8813 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8814 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8815 Node->getFlags());
8816 }
8817 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8818 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8819 Node->getFlags());
8820 }
8821 }
8822
8823 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8824 }
8825
8826 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8827 // instead if there are no NaNs.
8828 if (Node->getFlags().hasNoNaNs() ||
8829 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8830 DAG.isKnownNeverNaN(Node->getOperand(1)))) {
8831 unsigned IEEE2018Op =
8832 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8833 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8834 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8835 Node->getOperand(1), Node->getFlags());
8836 }
8837
8839 return SelCC;
8840
8841 return SDValue();
8842}
8843
8845 SelectionDAG &DAG) const {
8846 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
8847 return Expanded;
8848
8849 SDLoc DL(N);
8850 SDValue LHS = N->getOperand(0);
8851 SDValue RHS = N->getOperand(1);
8852 unsigned Opc = N->getOpcode();
8853 EVT VT = N->getValueType(0);
8854 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8855 bool IsMax = Opc == ISD::FMAXIMUM;
8856 SDNodeFlags Flags = N->getFlags();
8857
8858 // First, implement comparison not propagating NaN. If no native fmin or fmax
8859 // available, use plain select with setcc instead.
8861 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8862 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8863
8864 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8865 // signed zero behavior.
8866 bool MinMaxMustRespectOrderedZero = false;
8867
8868 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8869 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8870 MinMaxMustRespectOrderedZero = true;
8871 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8872 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8873 } else {
8875 return DAG.UnrollVectorOp(N);
8876
8877 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8878 SDValue Compare =
8879 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8880 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8881 }
8882
8883 // Propagate any NaN of both operands
8884 if (!N->getFlags().hasNoNaNs() &&
8885 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8886 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8888 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8889 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8890 }
8891
8892 // fminimum/fmaximum requires -0.0 less than +0.0
8893 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8894 !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8895 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8896 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8897 SDValue TestZero =
8898 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8899 SDValue LCmp = DAG.getSelect(
8900 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8901 MinMax, Flags);
8902 SDValue RCmp = DAG.getSelect(
8903 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8904 LCmp, Flags);
8905 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8906 }
8907
8908 return MinMax;
8909}
8910
8912 SelectionDAG &DAG) const {
8913 SDLoc DL(Node);
8914 SDValue LHS = Node->getOperand(0);
8915 SDValue RHS = Node->getOperand(1);
8916 unsigned Opc = Node->getOpcode();
8917 EVT VT = Node->getValueType(0);
8918 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8919 bool IsMax = Opc == ISD::FMAXIMUMNUM;
8920 SDNodeFlags Flags = Node->getFlags();
8921
8922 unsigned NewOp =
8924
8925 if (isOperationLegalOrCustom(NewOp, VT)) {
8926 if (!Flags.hasNoNaNs()) {
8927 // Insert canonicalizes if it's possible we need to quiet to get correct
8928 // sNaN behavior.
8929 if (!DAG.isKnownNeverSNaN(LHS)) {
8930 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8931 }
8932 if (!DAG.isKnownNeverSNaN(RHS)) {
8933 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8934 }
8935 }
8936
8937 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8938 }
8939
8940 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8941 // same behaviors for all of other cases: +0.0 vs -0.0 included.
8942 if (Flags.hasNoNaNs() ||
8943 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8944 unsigned IEEE2019Op =
8946 if (isOperationLegalOrCustom(IEEE2019Op, VT))
8947 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8948 }
8949
8950 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8951 // either one for +0.0 vs -0.0.
8952 if ((Flags.hasNoNaNs() ||
8953 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8954 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8955 DAG.isKnownNeverZeroFloat(RHS))) {
8956 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8957 if (isOperationLegalOrCustom(IEEE2008Op, VT))
8958 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8959 }
8960
8961 if (VT.isVector() &&
8964 return DAG.UnrollVectorOp(Node);
8965
8966 // If only one operand is NaN, override it with another operand.
8967 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8968 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8969 }
8970 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8971 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8972 }
8973
8974 // Always prefer RHS if equal.
8975 SDValue MinMax =
8976 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8977
8978 // TODO: We need quiet sNaN if strictfp.
8979
8980 // Fixup signed zero behavior.
8981 if (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8982 DAG.isKnownNeverZeroFloat(RHS)) {
8983 return MinMax;
8984 }
8985 SDValue TestZero =
8986 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8987 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8988 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8989 EVT IntVT = VT.changeTypeToInteger();
8990 EVT FloatVT = VT.changeElementType(*DAG.getContext(), MVT::f32);
8991 SDValue LHSTrunc = LHS;
8993 LHSTrunc = DAG.getNode(ISD::FP_ROUND, DL, FloatVT, LHS,
8994 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8995 }
8996 // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
8997 // we preferred RHS when generate MinMax, if the operands are equal.
8998 SDValue RetZero = DAG.getSelect(
8999 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHSTrunc, TestZero), LHS,
9000 MinMax, Flags);
9001 return DAG.getSelect(DL, VT, IsZero, RetZero, MinMax, Flags);
9002}
9003
9004/// Returns a true value if if this FPClassTest can be performed with an ordered
9005/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
9006/// std::nullopt if it cannot be performed as a compare with 0.
9007static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
9008 const fltSemantics &Semantics,
9009 const MachineFunction &MF) {
9010 FPClassTest OrderedMask = Test & ~fcNan;
9011 FPClassTest NanTest = Test & fcNan;
9012 bool IsOrdered = NanTest == fcNone;
9013 bool IsUnordered = NanTest == fcNan;
9014
9015 // Skip cases that are testing for only a qnan or snan.
9016 if (!IsOrdered && !IsUnordered)
9017 return std::nullopt;
9018
9019 if (OrderedMask == fcZero &&
9020 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
9021 return IsOrdered;
9022 if (OrderedMask == (fcZero | fcSubnormal) &&
9023 MF.getDenormalMode(Semantics).inputsAreZero())
9024 return IsOrdered;
9025 return std::nullopt;
9026}
9027
9029 const FPClassTest OrigTestMask,
9030 SDNodeFlags Flags, const SDLoc &DL,
9031 SelectionDAG &DAG) const {
9032 EVT OperandVT = Op.getValueType();
9033 assert(OperandVT.isFloatingPoint());
9034 FPClassTest Test = OrigTestMask;
9035
9036 // Degenerated cases.
9037 if (Test == fcNone)
9038 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
9039 if (Test == fcAllFlags)
9040 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
9041
9042 // PPC double double is a pair of doubles, of which the higher part determines
9043 // the value class.
9044 if (OperandVT == MVT::ppcf128) {
9045 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
9046 DAG.getConstant(1, DL, MVT::i32));
9047 OperandVT = MVT::f64;
9048 }
9049
9050 // Floating-point type properties.
9051 EVT ScalarFloatVT = OperandVT.getScalarType();
9052 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
9053 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
9054 bool IsF80 = (ScalarFloatVT == MVT::f80);
9055
9056 // Some checks can be implemented using float comparisons, if floating point
9057 // exceptions are ignored.
9058 if (Flags.hasNoFPExcept() &&
9060 FPClassTest FPTestMask = Test;
9061 bool IsInvertedFP = false;
9062
9063 if (FPClassTest InvertedFPCheck =
9064 invertFPClassTestIfSimpler(FPTestMask, true)) {
9065 FPTestMask = InvertedFPCheck;
9066 IsInvertedFP = true;
9067 }
9068
9069 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
9070 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
9071
9072 // See if we can fold an | fcNan into an unordered compare.
9073 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
9074
9075 // Can't fold the ordered check if we're only testing for snan or qnan
9076 // individually.
9077 if ((FPTestMask & fcNan) != fcNan)
9078 OrderedFPTestMask = FPTestMask;
9079
9080 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
9081
9082 if (std::optional<bool> IsCmp0 =
9083 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
9084 IsCmp0 && (isCondCodeLegalOrCustom(
9085 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
9086 OperandVT.getScalarType().getSimpleVT()))) {
9087
9088 // If denormals could be implicitly treated as 0, this is not equivalent
9089 // to a compare with 0 since it will also be true for denormals.
9090 return DAG.getSetCC(DL, ResultVT, Op,
9091 DAG.getConstantFP(0.0, DL, OperandVT),
9092 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
9093 }
9094
9095 if (FPTestMask == fcNan &&
9097 OperandVT.getScalarType().getSimpleVT()))
9098 return DAG.getSetCC(DL, ResultVT, Op, Op,
9099 IsInvertedFP ? ISD::SETO : ISD::SETUO);
9100
9101 bool IsOrderedInf = FPTestMask == fcInf;
9102 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
9103 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
9104 : UnorderedCmpOpcode,
9105 OperandVT.getScalarType().getSimpleVT()) &&
9108 (OperandVT.isVector() &&
9110 // isinf(x) --> fabs(x) == inf
9111 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9112 SDValue Inf =
9113 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9114 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
9115 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
9116 }
9117
9118 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
9119 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
9120 : UnorderedCmpOpcode,
9121 OperandVT.getSimpleVT())) {
9122 // isposinf(x) --> x == inf
9123 // isneginf(x) --> x == -inf
9124 // isposinf(x) || nan --> x u== inf
9125 // isneginf(x) || nan --> x u== -inf
9126
9127 SDValue Inf = DAG.getConstantFP(
9128 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
9129 OperandVT);
9130 return DAG.getSetCC(DL, ResultVT, Op, Inf,
9131 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9132 }
9133
9134 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9135 // TODO: Could handle ordered case, but it produces worse code for
9136 // x86. Maybe handle ordered if fabs is free?
9137
9138 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9139 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9140
9141 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
9142 OperandVT.getScalarType().getSimpleVT())) {
9143 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9144
9145 // TODO: Maybe only makes sense if fabs is free. Integer test of
9146 // exponent bits seems better for x86.
9147 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9148 SDValue SmallestNormal = DAG.getConstantFP(
9149 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9150 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
9151 IsOrdered ? OrderedOp : UnorderedOp);
9152 }
9153 }
9154
9155 if (FPTestMask == fcNormal) {
9156 // TODO: Handle unordered
9157 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9158 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9159
9160 if (isCondCodeLegalOrCustom(IsFiniteOp,
9161 OperandVT.getScalarType().getSimpleVT()) &&
9162 isCondCodeLegalOrCustom(IsNormalOp,
9163 OperandVT.getScalarType().getSimpleVT()) &&
9164 isFAbsFree(OperandVT)) {
9165 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9166 SDValue Inf =
9167 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9168 SDValue SmallestNormal = DAG.getConstantFP(
9169 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9170
9171 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9172 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
9173 SDValue IsNormal =
9174 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
9175 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9176 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
9177 }
9178 }
9179 }
9180
9181 // Some checks may be represented as inversion of simpler check, for example
9182 // "inf|normal|subnormal|zero" => !"nan".
9183 bool IsInverted = false;
9184
9185 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
9186 Test = InvertedCheck;
9187 IsInverted = true;
9188 }
9189
9190 // In the general case use integer operations.
9191 unsigned BitSize = OperandVT.getScalarSizeInBits();
9192 EVT IntVT = OperandVT.changeElementType(
9193 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), BitSize));
9194 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
9195
9196 // Various masks.
9197 APInt SignBit = APInt::getSignMask(BitSize);
9198 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9199 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9200 const unsigned ExplicitIntBitInF80 = 63;
9201 APInt ExpMask = Inf;
9202 if (IsF80)
9203 ExpMask.clearBit(ExplicitIntBitInF80);
9204 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9205 APInt QNaNBitMask =
9206 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9207 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
9208
9209 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
9210 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
9211 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
9212 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
9213 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
9214 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
9215
9216 SDValue Res;
9217 const auto appendResult = [&](SDValue PartialRes) {
9218 if (PartialRes) {
9219 if (Res)
9220 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
9221 else
9222 Res = PartialRes;
9223 }
9224 };
9225
9226 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9227 const auto getIntBitIsSet = [&]() -> SDValue {
9228 if (!IntBitIsSetV) {
9229 APInt IntBitMask(BitSize, 0);
9230 IntBitMask.setBit(ExplicitIntBitInF80);
9231 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
9232 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
9233 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
9234 }
9235 return IntBitIsSetV;
9236 };
9237
9238 // Split the value into sign bit and absolute value.
9239 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
9240 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
9241 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
9242
9243 // Tests that involve more than one class should be processed first.
9244 SDValue PartialRes;
9245
9246 if (IsF80)
9247 ; // Detect finite numbers of f80 by checking individual classes because
9248 // they have different settings of the explicit integer bit.
9249 else if ((Test & fcFinite) == fcFinite) {
9250 // finite(V) ==> abs(V) < exp_mask
9251 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9252 Test &= ~fcFinite;
9253 } else if ((Test & fcFinite) == fcPosFinite) {
9254 // finite(V) && V > 0 ==> V < exp_mask
9255 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
9256 Test &= ~fcPosFinite;
9257 } else if ((Test & fcFinite) == fcNegFinite) {
9258 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9259 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9260 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9261 Test &= ~fcNegFinite;
9262 }
9263 appendResult(PartialRes);
9264
9265 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9266 // fcZero | fcSubnormal => test all exponent bits are 0
9267 // TODO: Handle sign bit specific cases
9268 if (PartialCheck == (fcZero | fcSubnormal)) {
9269 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
9270 SDValue ExpIsZero =
9271 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9272 appendResult(ExpIsZero);
9273 Test &= ~PartialCheck & fcAllFlags;
9274 }
9275 }
9276
9277 // Check for individual classes.
9278
9279 if (unsigned PartialCheck = Test & fcZero) {
9280 if (PartialCheck == fcPosZero)
9281 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
9282 else if (PartialCheck == fcZero)
9283 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
9284 else // ISD::fcNegZero
9285 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
9286 appendResult(PartialRes);
9287 }
9288
9289 if (unsigned PartialCheck = Test & fcSubnormal) {
9290 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9291 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9292 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9293 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
9294 SDValue VMinusOneV =
9295 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
9296 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
9297 if (PartialCheck == fcNegSubnormal)
9298 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9299 appendResult(PartialRes);
9300 }
9301
9302 if (unsigned PartialCheck = Test & fcInf) {
9303 if (PartialCheck == fcPosInf)
9304 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9305 else if (PartialCheck == fcInf)
9306 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9307 else { // ISD::fcNegInf
9308 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9309 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
9310 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
9311 }
9312 appendResult(PartialRes);
9313 }
9314
9315 if (unsigned PartialCheck = Test & fcNan) {
9316 APInt InfWithQnanBit = Inf | QNaNBitMask;
9317 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
9318 if (PartialCheck == fcNan) {
9319 // isnan(V) ==> abs(V) > int(inf)
9320 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9321 if (IsF80) {
9322 // Recognize unsupported values as NaNs for compatibility with glibc.
9323 // In them (exp(V)==0) == int_bit.
9324 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
9325 SDValue ExpIsZero =
9326 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9327 SDValue IsPseudo =
9328 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
9329 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
9330 }
9331 } else if (PartialCheck == fcQNan) {
9332 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9333 PartialRes =
9334 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9335 } else { // ISD::fcSNan
9336 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9337 // abs(V) < (unsigned(Inf) | quiet_bit)
9338 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9339 SDValue IsNotQnan =
9340 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9341 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9342 }
9343 appendResult(PartialRes);
9344 }
9345
9346 if (unsigned PartialCheck = Test & fcNormal) {
9347 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9348 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9349 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9350 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9351 APInt ExpLimit = ExpMask - ExpLSB;
9352 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9353 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9354 if (PartialCheck == fcNegNormal)
9355 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9356 else if (PartialCheck == fcPosNormal) {
9357 SDValue PosSignV =
9358 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
9359 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9360 }
9361 if (IsF80)
9362 PartialRes =
9363 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9364 appendResult(PartialRes);
9365 }
9366
9367 if (!Res)
9368 return DAG.getConstant(IsInverted, DL, ResultVT);
9369 if (IsInverted)
9370 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
9371 return Res;
9372}
9373
9374// Only expand vector types if we have the appropriate vector bit operations.
9375static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9376 assert(VT.isVector() && "Expected vector type");
9377 unsigned Len = VT.getScalarSizeInBits();
9378 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9381 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9383}
9384
9386 SDLoc dl(Node);
9387 EVT VT = Node->getValueType(0);
9388 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9389 SDValue Op = Node->getOperand(0);
9390 unsigned Len = VT.getScalarSizeInBits();
9391 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9392
9393 // TODO: Add support for irregular type lengths.
9394 if (!(Len <= 128 && Len % 8 == 0))
9395 return SDValue();
9396
9397 // Only expand vector types if we have the appropriate vector bit operations.
9398 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9399 return SDValue();
9400
9401 // This is the "best" algorithm from
9402 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9403 SDValue Mask55 =
9404 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9405 SDValue Mask33 =
9406 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9407 SDValue Mask0F =
9408 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9409
9410 // v = v - ((v >> 1) & 0x55555555...)
9411 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9412 DAG.getNode(ISD::AND, dl, VT,
9413 DAG.getNode(ISD::SRL, dl, VT, Op,
9414 DAG.getConstant(1, dl, ShVT)),
9415 Mask55));
9416 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9417 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9418 DAG.getNode(ISD::AND, dl, VT,
9419 DAG.getNode(ISD::SRL, dl, VT, Op,
9420 DAG.getConstant(2, dl, ShVT)),
9421 Mask33));
9422 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9423 Op = DAG.getNode(ISD::AND, dl, VT,
9424 DAG.getNode(ISD::ADD, dl, VT, Op,
9425 DAG.getNode(ISD::SRL, dl, VT, Op,
9426 DAG.getConstant(4, dl, ShVT))),
9427 Mask0F);
9428
9429 if (Len <= 8)
9430 return Op;
9431
9432 // Avoid the multiply if we only have 2 bytes to add.
9433 // TODO: Only doing this for scalars because vectors weren't as obviously
9434 // improved.
9435 if (Len == 16 && !VT.isVector()) {
9436 // v = (v + (v >> 8)) & 0x00FF;
9437 return DAG.getNode(ISD::AND, dl, VT,
9438 DAG.getNode(ISD::ADD, dl, VT, Op,
9439 DAG.getNode(ISD::SRL, dl, VT, Op,
9440 DAG.getConstant(8, dl, ShVT))),
9441 DAG.getConstant(0xFF, dl, VT));
9442 }
9443
9444 // v = (v * 0x01010101...) >> (Len - 8)
9445 SDValue V;
9448 SDValue Mask01 =
9449 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9450 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9451 } else {
9452 V = Op;
9453 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9454 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9455 V = DAG.getNode(ISD::ADD, dl, VT, V,
9456 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9457 }
9458 }
9459 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9460}
9461
9463 SDLoc dl(Node);
9464 EVT VT = Node->getValueType(0);
9465 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9466 SDValue Op = Node->getOperand(0);
9467 SDValue Mask = Node->getOperand(1);
9468 SDValue VL = Node->getOperand(2);
9469 unsigned Len = VT.getScalarSizeInBits();
9470 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9471
9472 // TODO: Add support for irregular type lengths.
9473 if (!(Len <= 128 && Len % 8 == 0))
9474 return SDValue();
9475
9476 // This is same algorithm of expandCTPOP from
9477 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9478 SDValue Mask55 =
9479 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9480 SDValue Mask33 =
9481 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9482 SDValue Mask0F =
9483 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9484
9485 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9486
9487 // v = v - ((v >> 1) & 0x55555555...)
9488 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9489 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9490 DAG.getConstant(1, dl, ShVT), Mask, VL),
9491 Mask55, Mask, VL);
9492 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9493
9494 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9495 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9496 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9497 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9498 DAG.getConstant(2, dl, ShVT), Mask, VL),
9499 Mask33, Mask, VL);
9500 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9501
9502 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9503 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9504 Mask, VL),
9505 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9506 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9507
9508 if (Len <= 8)
9509 return Op;
9510
9511 // v = (v * 0x01010101...) >> (Len - 8)
9512 SDValue V;
9514 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9515 SDValue Mask01 =
9516 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9517 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9518 } else {
9519 V = Op;
9520 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9521 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9522 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9523 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9524 Mask, VL);
9525 }
9526 }
9527 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9528 Mask, VL);
9529}
9530
9532 SDLoc dl(Node);
9533 EVT VT = Node->getValueType(0);
9534 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9535 SDValue Op = Node->getOperand(0);
9536 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9537
9538 // If the non-ZERO_UNDEF version is supported we can use that instead.
9539 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9541 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9542
9543 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9545 EVT SetCCVT =
9546 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9547 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9548 SDValue Zero = DAG.getConstant(0, dl, VT);
9549 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9550 return DAG.getSelect(dl, VT, SrcIsZero,
9551 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9552 }
9553
9554 // Only expand vector types if we have the appropriate vector bit operations.
9555 // This includes the operations needed to expand CTPOP if it isn't supported.
9556 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9558 !canExpandVectorCTPOP(*this, VT)) ||
9561 return SDValue();
9562
9563 // for now, we do this:
9564 // x = x | (x >> 1);
9565 // x = x | (x >> 2);
9566 // ...
9567 // x = x | (x >>16);
9568 // x = x | (x >>32); // for 64-bit input
9569 // return popcount(~x);
9570 //
9571 // Ref: "Hacker's Delight" by Henry Warren
9572 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9573 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9574 Op = DAG.getNode(ISD::OR, dl, VT, Op,
9575 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9576 }
9577 Op = DAG.getNOT(dl, Op, VT);
9578 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9579}
9580
9582 SDLoc dl(Node);
9583 EVT VT = Node->getValueType(0);
9584 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9585 SDValue Op = Node->getOperand(0);
9586 SDValue Mask = Node->getOperand(1);
9587 SDValue VL = Node->getOperand(2);
9588 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9589
9590 // do this:
9591 // x = x | (x >> 1);
9592 // x = x | (x >> 2);
9593 // ...
9594 // x = x | (x >>16);
9595 // x = x | (x >>32); // for 64-bit input
9596 // return popcount(~x);
9597 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9598 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9599 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9600 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9601 VL);
9602 }
9603 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9604 Mask, VL);
9605 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9606}
9607
9609 const SDLoc &DL, EVT VT, SDValue Op,
9610 unsigned BitWidth) const {
9611 if (BitWidth != 32 && BitWidth != 64)
9612 return SDValue();
9613 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9614 : APInt(64, 0x0218A392CD3D5DBFULL);
9615 const DataLayout &TD = DAG.getDataLayout();
9616 MachinePointerInfo PtrInfo =
9618 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9619 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9620 SDValue Lookup = DAG.getNode(
9621 ISD::SRL, DL, VT,
9622 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9623 DAG.getConstant(DeBruijn, DL, VT)),
9624 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
9626
9628 for (unsigned i = 0; i < BitWidth; i++) {
9629 APInt Shl = DeBruijn.shl(i);
9630 APInt Lshr = Shl.lshr(ShiftAmt);
9631 Table[Lshr.getZExtValue()] = i;
9632 }
9633
9634 // Create a ConstantArray in Constant Pool
9635 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9636 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9637 TD.getPrefTypeAlign(CA->getType()));
9638 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9639 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9640 PtrInfo, MVT::i8);
9641 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9642 return ExtLoad;
9643
9644 EVT SetCCVT =
9645 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9646 SDValue Zero = DAG.getConstant(0, DL, VT);
9647 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9648 return DAG.getSelect(DL, VT, SrcIsZero,
9649 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9650}
9651
9653 SDLoc dl(Node);
9654 EVT VT = Node->getValueType(0);
9655 SDValue Op = Node->getOperand(0);
9656 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9657
9658 // If the non-ZERO_UNDEF version is supported we can use that instead.
9659 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9661 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9662
9663 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9665 EVT SetCCVT =
9666 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9667 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9668 SDValue Zero = DAG.getConstant(0, dl, VT);
9669 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9670 return DAG.getSelect(dl, VT, SrcIsZero,
9671 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9672 }
9673
9674 // Only expand vector types if we have the appropriate vector bit operations.
9675 // This includes the operations needed to expand CTPOP if it isn't supported.
9676 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9679 !canExpandVectorCTPOP(*this, VT)) ||
9683 return SDValue();
9684
9685 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
9686 // to be expanded or converted to a libcall.
9689 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9690 return V;
9691
9692 // for now, we use: { return popcount(~x & (x - 1)); }
9693 // unless the target has ctlz but not ctpop, in which case we use:
9694 // { return 32 - nlz(~x & (x-1)); }
9695 // Ref: "Hacker's Delight" by Henry Warren
9696 SDValue Tmp = DAG.getNode(
9697 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9698 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9699
9700 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9702 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9703 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9704 }
9705
9706 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9707}
9708
9710 SDValue Op = Node->getOperand(0);
9711 SDValue Mask = Node->getOperand(1);
9712 SDValue VL = Node->getOperand(2);
9713 SDLoc dl(Node);
9714 EVT VT = Node->getValueType(0);
9715
9716 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9717 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9718 DAG.getAllOnesConstant(dl, VT), Mask, VL);
9719 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9720 DAG.getConstant(1, dl, VT), Mask, VL);
9721 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9722 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9723}
9724
9726 SelectionDAG &DAG) const {
9727 // %cond = to_bool_vec %source
9728 // %splat = splat /*val=*/VL
9729 // %tz = step_vector
9730 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9731 // %r = vp.reduce.umin %v
9732 SDLoc DL(N);
9733 SDValue Source = N->getOperand(0);
9734 SDValue Mask = N->getOperand(1);
9735 SDValue EVL = N->getOperand(2);
9736 EVT SrcVT = Source.getValueType();
9737 EVT ResVT = N->getValueType(0);
9738 EVT ResVecVT =
9739 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9740
9741 // Convert to boolean vector.
9742 if (SrcVT.getScalarType() != MVT::i1) {
9743 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9744 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9745 SrcVT.getVectorElementCount());
9746 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9747 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9748 }
9749
9750 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9751 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9752 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9753 SDValue Select =
9754 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9755 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9756}
9757
9759 SelectionDAG &DAG) const {
9760 SDLoc DL(N);
9761 SDValue Mask = N->getOperand(0);
9762 EVT MaskVT = Mask.getValueType();
9763 EVT BoolVT = MaskVT.getScalarType();
9764
9765 // Find a suitable type for a stepvector.
9766 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
9767 if (MaskVT.isScalableVector())
9768 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
9769 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9770 uint64_t EltWidth = TLI.getBitWidthForCttzElements(
9771 BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(),
9772 /*ZeroIsPoison=*/true, &VScaleRange);
9773 // If the step vector element type is smaller than the mask element type,
9774 // use the mask type directly to avoid widening issues.
9775 EltWidth = std::max(EltWidth, BoolVT.getFixedSizeInBits());
9776 EVT StepVT = MVT::getIntegerVT(EltWidth);
9777 EVT StepVecVT = MaskVT.changeVectorElementType(*DAG.getContext(), StepVT);
9778
9779 // If promotion or widening is required to make the type legal, do it here.
9780 // Promotion of integers within LegalizeVectorOps is looking for types of
9781 // the same size but with a smaller number of larger elements, not the usual
9782 // larger size with the same number of larger elements.
9784 TLI.getTypeAction(StepVecVT.getSimpleVT());
9785 SDValue StepVec;
9786 if (TypeAction == TargetLowering::TypePromoteInteger) {
9787 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
9788 StepVT = StepVecVT.getVectorElementType();
9789 StepVec = DAG.getStepVector(DL, StepVecVT);
9790 } else if (TypeAction == TargetLowering::TypeWidenVector) {
9791 // For widening, the element count changes. Create a step vector with only
9792 // the original elements valid and zeros for padding. Also widen the mask.
9793 EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
9794 unsigned WideNumElts = WideVecVT.getVectorNumElements();
9795
9796 // Build widened step vector: <0, 1, ..., OrigNumElts-1, poison, poison, ..>
9797 SDValue OrigStepVec = DAG.getStepVector(DL, StepVecVT);
9798 SDValue UndefStep = DAG.getPOISON(WideVecVT);
9799 StepVec = DAG.getInsertSubvector(DL, UndefStep, OrigStepVec, 0);
9800
9801 // Widen mask: pad with zeros.
9802 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), BoolVT, WideNumElts);
9803 SDValue ZeroMask = DAG.getConstant(0, DL, WideMaskVT);
9804 Mask = DAG.getInsertSubvector(DL, ZeroMask, Mask, 0);
9805
9806 StepVecVT = WideVecVT;
9807 StepVT = WideVecVT.getVectorElementType();
9808 } else {
9809 StepVec = DAG.getStepVector(DL, StepVecVT);
9810 }
9811
9812 // Zero out lanes with inactive elements, then find the highest remaining
9813 // value from the stepvector.
9814 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
9815 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
9816 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
9817 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
9818}
9819
9821 bool IsNegative) const {
9822 SDLoc dl(N);
9823 EVT VT = N->getValueType(0);
9824 SDValue Op = N->getOperand(0);
9825
9826 // abs(x) -> smax(x,sub(0,x))
9827 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9829 SDValue Zero = DAG.getConstant(0, dl, VT);
9830 Op = DAG.getFreeze(Op);
9831 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9832 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9833 }
9834
9835 // abs(x) -> umin(x,sub(0,x))
9836 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9838 SDValue Zero = DAG.getConstant(0, dl, VT);
9839 Op = DAG.getFreeze(Op);
9840 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9841 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9842 }
9843
9844 // 0 - abs(x) -> smin(x, sub(0,x))
9845 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9847 SDValue Zero = DAG.getConstant(0, dl, VT);
9848 Op = DAG.getFreeze(Op);
9849 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9850 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9851 }
9852
9853 // Only expand vector types if we have the appropriate vector operations.
9854 if (VT.isVector() &&
9856 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9857 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9859 return SDValue();
9860
9861 Op = DAG.getFreeze(Op);
9862 SDValue Shift = DAG.getNode(
9863 ISD::SRA, dl, VT, Op,
9864 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9865 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9866
9867 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9868 if (!IsNegative)
9869 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9870
9871 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9872 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9873}
9874
9876 SDLoc dl(N);
9877 EVT VT = N->getValueType(0);
9878 SDValue LHS = N->getOperand(0);
9879 SDValue RHS = N->getOperand(1);
9880 bool IsSigned = N->getOpcode() == ISD::ABDS;
9881
9882 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9883 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9884 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9885 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9886 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9887 LHS = DAG.getFreeze(LHS);
9888 RHS = DAG.getFreeze(RHS);
9889 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9890 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9891 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9892 }
9893
9894 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9895 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
9896 LHS = DAG.getFreeze(LHS);
9897 RHS = DAG.getFreeze(RHS);
9898 return DAG.getNode(ISD::OR, dl, VT,
9899 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9900 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9901 }
9902
9903 // If the subtract doesn't overflow then just use abs(sub())
9904 bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
9905
9906 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
9907 return DAG.getNode(ISD::ABS, dl, VT,
9908 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9909
9910 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
9911 return DAG.getNode(ISD::ABS, dl, VT,
9912 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9913
9914 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9916 LHS = DAG.getFreeze(LHS);
9917 RHS = DAG.getFreeze(RHS);
9918 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9919
9920 // Branchless expansion iff cmp result is allbits:
9921 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9922 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9923 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9924 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9925 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9926 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9927 }
9928
9929 // Similar to the branchless expansion, if we don't prefer selects, use the
9930 // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
9931 // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
9932 // rhs), uof(lhs, rhs)), uof(lhs, rhs))
9933 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
9935 SDValue USubO =
9936 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9937 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9938 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9939 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9940 }
9941
9942 // FIXME: Should really try to split the vector in case it's legal on a
9943 // subvector.
9945 return DAG.UnrollVectorOp(N);
9946
9947 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9948 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9949 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9950 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9951}
9952
9954 SDLoc dl(N);
9955 EVT VT = N->getValueType(0);
9956 SDValue LHS = N->getOperand(0);
9957 SDValue RHS = N->getOperand(1);
9958
9959 unsigned Opc = N->getOpcode();
9960 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9961 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9962 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9963 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9964 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9965 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9967 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9968 "Unknown AVG node");
9969
9970 // If the operands are already extended, we can add+shift.
9971 bool IsExt =
9972 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9973 DAG.ComputeNumSignBits(RHS) >= 2) ||
9974 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9975 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
9976 if (IsExt) {
9977 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9978 if (!IsFloor)
9979 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9980 return DAG.getNode(ShiftOpc, dl, VT, Sum,
9981 DAG.getShiftAmountConstant(1, VT, dl));
9982 }
9983
9984 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9985 if (VT.isScalarInteger()) {
9986 unsigned BW = VT.getScalarSizeInBits();
9987 EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9988 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9989 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9990 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9991 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9992 if (!IsFloor)
9993 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9994 DAG.getConstant(1, dl, ExtVT));
9995 // Just use SRL as we will be truncating away the extended sign bits.
9996 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9997 DAG.getShiftAmountConstant(1, ExtVT, dl));
9998 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9999 }
10000 }
10001
10002 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
10003 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
10004 SDValue UAddWithOverflow =
10005 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
10006
10007 SDValue Sum = UAddWithOverflow.getValue(0);
10008 SDValue Overflow = UAddWithOverflow.getValue(1);
10009
10010 // Right shift the sum by 1
10011 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
10012 DAG.getShiftAmountConstant(1, VT, dl));
10013
10014 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
10015 SDValue OverflowShl = DAG.getNode(
10016 ISD::SHL, dl, VT, ZeroExtOverflow,
10017 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
10018
10019 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
10020 }
10021
10022 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
10023 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
10024 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
10025 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
10026 LHS = DAG.getFreeze(LHS);
10027 RHS = DAG.getFreeze(RHS);
10028 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
10029 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10030 SDValue Shift =
10031 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
10032 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
10033}
10034
10036 SDLoc dl(N);
10037 EVT VT = N->getValueType(0);
10038 SDValue Op = N->getOperand(0);
10039
10040 if (!VT.isSimple())
10041 return SDValue();
10042
10043 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10044 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10045 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10046 default:
10047 return SDValue();
10048 case MVT::i16:
10049 // Use a rotate by 8. This can be further expanded if necessary.
10050 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10051 case MVT::i32:
10052 // This is meant for ARM speficially, which has ROTR but no ROTL.
10054 SDValue Mask = DAG.getConstant(0x00FF00FF, dl, VT);
10055 // (x & 0x00FF00FF) rotr 8 | (x rotl 8) & 0x00FF00FF
10056 SDValue And = DAG.getNode(ISD::AND, dl, VT, Op, Mask);
10057 SDValue Rotr =
10058 DAG.getNode(ISD::ROTR, dl, VT, And, DAG.getConstant(8, dl, SHVT));
10059 SDValue Rotl =
10060 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10061 SDValue And2 = DAG.getNode(ISD::AND, dl, VT, Rotl, Mask);
10062 return DAG.getNode(ISD::OR, dl, VT, Rotr, And2);
10063 }
10064 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10065 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
10066 DAG.getConstant(0xFF00, dl, VT));
10067 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
10068 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10069 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
10070 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10071 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10072 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10073 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10074 case MVT::i64:
10075 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10076 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
10077 DAG.getConstant(255ULL<<8, dl, VT));
10078 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
10079 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
10080 DAG.getConstant(255ULL<<16, dl, VT));
10081 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
10082 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
10083 DAG.getConstant(255ULL<<24, dl, VT));
10084 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
10085 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10086 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
10087 DAG.getConstant(255ULL<<24, dl, VT));
10088 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10089 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
10090 DAG.getConstant(255ULL<<16, dl, VT));
10091 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
10092 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
10093 DAG.getConstant(255ULL<<8, dl, VT));
10094 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10095 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
10096 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
10097 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10098 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10099 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
10100 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10101 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
10102 }
10103}
10104
10106 SDLoc dl(N);
10107 EVT VT = N->getValueType(0);
10108 SDValue Op = N->getOperand(0);
10109 SDValue Mask = N->getOperand(1);
10110 SDValue EVL = N->getOperand(2);
10111
10112 if (!VT.isSimple())
10113 return SDValue();
10114
10115 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10116 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10117 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10118 default:
10119 return SDValue();
10120 case MVT::i16:
10121 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10122 Mask, EVL);
10123 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10124 Mask, EVL);
10125 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
10126 case MVT::i32:
10127 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10128 Mask, EVL);
10129 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
10130 Mask, EVL);
10131 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
10132 Mask, EVL);
10133 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10134 Mask, EVL);
10135 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10136 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
10137 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10138 Mask, EVL);
10139 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10140 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10141 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10142 case MVT::i64:
10143 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10144 Mask, EVL);
10145 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10146 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10147 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
10148 Mask, EVL);
10149 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10150 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10151 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
10152 Mask, EVL);
10153 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10154 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10155 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
10156 Mask, EVL);
10157 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10158 Mask, EVL);
10159 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
10160 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10161 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10162 Mask, EVL);
10163 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
10164 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10165 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
10166 Mask, EVL);
10167 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10168 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10169 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10170 Mask, EVL);
10171 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
10172 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
10173 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10174 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10175 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
10176 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10177 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
10178 }
10179}
10180
10182 SDLoc dl(N);
10183 EVT VT = N->getValueType(0);
10184 SDValue Op = N->getOperand(0);
10185 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10186 unsigned Sz = VT.getScalarSizeInBits();
10187
10188 SDValue Tmp, Tmp2, Tmp3;
10189
10190 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10191 // and finally the i1 pairs.
10192 // TODO: We can easily support i4/i2 legal types if any target ever does.
10193 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10194 // Create the masks - repeating the pattern every byte.
10195 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10196 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10197 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10198
10199 // BSWAP if the type is wider than a single byte.
10200 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
10201
10202 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10203 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
10204 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
10205 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
10206 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
10207 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10208
10209 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10210 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
10211 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
10212 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
10213 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
10214 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10215
10216 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10217 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
10218 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
10219 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
10220 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
10221 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10222 return Tmp;
10223 }
10224
10225 Tmp = DAG.getConstant(0, dl, VT);
10226 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
10227 if (I < J)
10228 Tmp2 =
10229 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
10230 else
10231 Tmp2 =
10232 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
10233
10234 APInt Shift = APInt::getOneBitSet(Sz, J);
10235 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
10236 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
10237 }
10238
10239 return Tmp;
10240}
10241
10243 assert(N->getOpcode() == ISD::VP_BITREVERSE);
10244
10245 SDLoc dl(N);
10246 EVT VT = N->getValueType(0);
10247 SDValue Op = N->getOperand(0);
10248 SDValue Mask = N->getOperand(1);
10249 SDValue EVL = N->getOperand(2);
10250 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10251 unsigned Sz = VT.getScalarSizeInBits();
10252
10253 SDValue Tmp, Tmp2, Tmp3;
10254
10255 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10256 // and finally the i1 pairs.
10257 // TODO: We can easily support i4/i2 legal types if any target ever does.
10258 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10259 // Create the masks - repeating the pattern every byte.
10260 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10261 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10262 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10263
10264 // BSWAP if the type is wider than a single byte.
10265 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
10266
10267 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10268 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
10269 Mask, EVL);
10270 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10271 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
10272 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
10273 Mask, EVL);
10274 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
10275 Mask, EVL);
10276 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10277
10278 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10279 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
10280 Mask, EVL);
10281 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10282 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
10283 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
10284 Mask, EVL);
10285 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
10286 Mask, EVL);
10287 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10288
10289 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10290 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
10291 Mask, EVL);
10292 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10293 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
10294 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
10295 Mask, EVL);
10296 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
10297 Mask, EVL);
10298 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10299 return Tmp;
10300 }
10301 return SDValue();
10302}
10303
10304std::pair<SDValue, SDValue>
10306 SelectionDAG &DAG) const {
10307 SDLoc SL(LD);
10308 SDValue Chain = LD->getChain();
10309 SDValue BasePTR = LD->getBasePtr();
10310 EVT SrcVT = LD->getMemoryVT();
10311 EVT DstVT = LD->getValueType(0);
10312 ISD::LoadExtType ExtType = LD->getExtensionType();
10313
10314 if (SrcVT.isScalableVector())
10315 report_fatal_error("Cannot scalarize scalable vector loads");
10316
10317 unsigned NumElem = SrcVT.getVectorNumElements();
10318
10319 EVT SrcEltVT = SrcVT.getScalarType();
10320 EVT DstEltVT = DstVT.getScalarType();
10321
10322 // A vector must always be stored in memory as-is, i.e. without any padding
10323 // between the elements, since various code depend on it, e.g. in the
10324 // handling of a bitcast of a vector type to int, which may be done with a
10325 // vector store followed by an integer load. A vector that does not have
10326 // elements that are byte-sized must therefore be stored as an integer
10327 // built out of the extracted vector elements.
10328 if (!SrcEltVT.isByteSized()) {
10329 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10330 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
10331
10332 unsigned NumSrcBits = SrcVT.getSizeInBits();
10333 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
10334
10335 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10336 SDValue SrcEltBitMask = DAG.getConstant(
10337 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
10338
10339 // Load the whole vector and avoid masking off the top bits as it makes
10340 // the codegen worse.
10341 SDValue Load =
10342 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
10343 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
10344 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10345
10347 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10348 unsigned ShiftIntoIdx =
10349 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10350 SDValue ShiftAmount = DAG.getShiftAmountConstant(
10351 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
10352 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
10353 SDValue Elt =
10354 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
10355 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
10356
10357 if (ExtType != ISD::NON_EXTLOAD) {
10358 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
10359 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
10360 }
10361
10362 Vals.push_back(Scalar);
10363 }
10364
10365 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10366 return std::make_pair(Value, Load.getValue(1));
10367 }
10368
10369 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10370 assert(SrcEltVT.isByteSized());
10371
10373 SmallVector<SDValue, 8> LoadChains;
10374
10375 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10376 SDValue ScalarLoad = DAG.getExtLoad(
10377 ExtType, SL, DstEltVT, Chain, BasePTR,
10378 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
10379 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10380
10381 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
10382
10383 Vals.push_back(ScalarLoad.getValue(0));
10384 LoadChains.push_back(ScalarLoad.getValue(1));
10385 }
10386
10387 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10388 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10389
10390 return std::make_pair(Value, NewChain);
10391}
10392
10394 SelectionDAG &DAG) const {
10395 SDLoc SL(ST);
10396
10397 SDValue Chain = ST->getChain();
10398 SDValue BasePtr = ST->getBasePtr();
10399 SDValue Value = ST->getValue();
10400 EVT StVT = ST->getMemoryVT();
10401
10402 if (StVT.isScalableVector())
10403 report_fatal_error("Cannot scalarize scalable vector stores");
10404
10405 // The type of the data we want to save
10406 EVT RegVT = Value.getValueType();
10407 EVT RegSclVT = RegVT.getScalarType();
10408
10409 // The type of data as saved in memory.
10410 EVT MemSclVT = StVT.getScalarType();
10411
10412 unsigned NumElem = StVT.getVectorNumElements();
10413
10414 // A vector must always be stored in memory as-is, i.e. without any padding
10415 // between the elements, since various code depend on it, e.g. in the
10416 // handling of a bitcast of a vector type to int, which may be done with a
10417 // vector store followed by an integer load. A vector that does not have
10418 // elements that are byte-sized must therefore be stored as an integer
10419 // built out of the extracted vector elements.
10420 if (!MemSclVT.isByteSized()) {
10421 unsigned NumBits = StVT.getSizeInBits();
10422 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10423
10424 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10425
10426 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10427 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10428 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10429 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10430 unsigned ShiftIntoIdx =
10431 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10432 SDValue ShiftAmount =
10433 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10434 SDValue ShiftedElt =
10435 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10436 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10437 }
10438
10439 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10440 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10441 ST->getAAInfo());
10442 }
10443
10444 // Store Stride in bytes
10445 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10446 assert(Stride && "Zero stride!");
10447 // Extract each of the elements from the original vector and save them into
10448 // memory individually.
10450 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10451 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10452
10453 SDValue Ptr =
10454 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10455
10456 // This scalar TruncStore may be illegal, but we legalize it later.
10457 SDValue Store = DAG.getTruncStore(
10458 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10459 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10460 ST->getAAInfo());
10461
10462 Stores.push_back(Store);
10463 }
10464
10465 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10466}
10467
10468std::pair<SDValue, SDValue>
10470 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10471 "unaligned indexed loads not implemented!");
10472 SDValue Chain = LD->getChain();
10473 SDValue Ptr = LD->getBasePtr();
10474 EVT VT = LD->getValueType(0);
10475 EVT LoadedVT = LD->getMemoryVT();
10476 SDLoc dl(LD);
10477 auto &MF = DAG.getMachineFunction();
10478
10479 if (VT.isFloatingPoint() || VT.isVector()) {
10480 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10481 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10482 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10483 LoadedVT.isVector()) {
10484 // Scalarize the load and let the individual components be handled.
10485 return scalarizeVectorLoad(LD, DAG);
10486 }
10487
10488 // Expand to a (misaligned) integer load of the same size,
10489 // then bitconvert to floating point or vector.
10490 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10491 LD->getMemOperand());
10492 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10493 if (LoadedVT != VT)
10494 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10495 ISD::ANY_EXTEND, dl, VT, Result);
10496
10497 return std::make_pair(Result, newLoad.getValue(1));
10498 }
10499
10500 // Copy the value to a (aligned) stack slot using (unaligned) integer
10501 // loads and stores, then do a (aligned) load from the stack slot.
10502 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10503 unsigned LoadedBytes = LoadedVT.getStoreSize();
10504 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10505 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10506
10507 // Make sure the stack slot is also aligned for the register type.
10508 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10509 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10511 SDValue StackPtr = StackBase;
10512 unsigned Offset = 0;
10513
10514 EVT PtrVT = Ptr.getValueType();
10515 EVT StackPtrVT = StackPtr.getValueType();
10516
10517 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10518 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10519
10520 // Do all but one copies using the full register width.
10521 for (unsigned i = 1; i < NumRegs; i++) {
10522 // Load one integer register's worth from the original location.
10523 SDValue Load = DAG.getLoad(
10524 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10525 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10526 // Follow the load with a store to the stack slot. Remember the store.
10527 Stores.push_back(DAG.getStore(
10528 Load.getValue(1), dl, Load, StackPtr,
10529 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10530 // Increment the pointers.
10531 Offset += RegBytes;
10532
10533 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10534 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10535 }
10536
10537 // The last copy may be partial. Do an extending load.
10538 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10539 8 * (LoadedBytes - Offset));
10540 SDValue Load = DAG.getExtLoad(
10541 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10542 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
10543 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10544 // Follow the load with a store to the stack slot. Remember the store.
10545 // On big-endian machines this requires a truncating store to ensure
10546 // that the bits end up in the right place.
10547 Stores.push_back(DAG.getTruncStore(
10548 Load.getValue(1), dl, Load, StackPtr,
10549 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10550
10551 // The order of the stores doesn't matter - say it with a TokenFactor.
10552 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10553
10554 // Finally, perform the original load only redirected to the stack slot.
10555 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10556 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10557 LoadedVT);
10558
10559 // Callers expect a MERGE_VALUES node.
10560 return std::make_pair(Load, TF);
10561 }
10562
10563 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10564 "Unaligned load of unsupported type.");
10565
10566 // Compute the new VT that is half the size of the old one. This is an
10567 // integer MVT.
10568 unsigned NumBits = LoadedVT.getSizeInBits();
10569 EVT NewLoadedVT;
10570 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10571 NumBits >>= 1;
10572
10573 Align Alignment = LD->getBaseAlign();
10574 unsigned IncrementSize = NumBits / 8;
10575 ISD::LoadExtType HiExtType = LD->getExtensionType();
10576
10577 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10578 if (HiExtType == ISD::NON_EXTLOAD)
10579 HiExtType = ISD::ZEXTLOAD;
10580
10581 // Load the value in two parts
10582 SDValue Lo, Hi;
10583 if (DAG.getDataLayout().isLittleEndian()) {
10584 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10585 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10586 LD->getAAInfo());
10587
10588 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10589 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10590 LD->getPointerInfo().getWithOffset(IncrementSize),
10591 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10592 LD->getAAInfo());
10593 } else {
10594 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10595 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10596 LD->getAAInfo());
10597
10598 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10599 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10600 LD->getPointerInfo().getWithOffset(IncrementSize),
10601 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10602 LD->getAAInfo());
10603 }
10604
10605 // aggregate the two parts
10606 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10607 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10608 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10609
10610 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10611 Hi.getValue(1));
10612
10613 return std::make_pair(Result, TF);
10614}
10615
10617 SelectionDAG &DAG) const {
10618 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10619 "unaligned indexed stores not implemented!");
10620 SDValue Chain = ST->getChain();
10621 SDValue Ptr = ST->getBasePtr();
10622 SDValue Val = ST->getValue();
10623 EVT VT = Val.getValueType();
10624 Align Alignment = ST->getBaseAlign();
10625 auto &MF = DAG.getMachineFunction();
10626 EVT StoreMemVT = ST->getMemoryVT();
10627
10628 SDLoc dl(ST);
10629 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10630 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10631 if (isTypeLegal(intVT)) {
10632 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10633 StoreMemVT.isVector()) {
10634 // Scalarize the store and let the individual components be handled.
10635 SDValue Result = scalarizeVectorStore(ST, DAG);
10636 return Result;
10637 }
10638 // Expand to a bitconvert of the value to the integer type of the
10639 // same size, then a (misaligned) int store.
10640 // FIXME: Does not handle truncating floating point stores!
10641 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10642 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10643 Alignment, ST->getMemOperand()->getFlags());
10644 return Result;
10645 }
10646 // Do a (aligned) store to a stack slot, then copy from the stack slot
10647 // to the final destination using (unaligned) integer loads and stores.
10648 MVT RegVT = getRegisterType(
10649 *DAG.getContext(),
10650 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10651 EVT PtrVT = Ptr.getValueType();
10652 unsigned StoredBytes = StoreMemVT.getStoreSize();
10653 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10654 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10655
10656 // Make sure the stack slot is also aligned for the register type.
10657 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10658 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10659
10660 // Perform the original store, only redirected to the stack slot.
10661 SDValue Store = DAG.getTruncStore(
10662 Chain, dl, Val, StackPtr,
10663 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10664
10665 EVT StackPtrVT = StackPtr.getValueType();
10666
10667 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10668 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10670 unsigned Offset = 0;
10671
10672 // Do all but one copies using the full register width.
10673 for (unsigned i = 1; i < NumRegs; i++) {
10674 // Load one integer register's worth from the stack slot.
10675 SDValue Load = DAG.getLoad(
10676 RegVT, dl, Store, StackPtr,
10677 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10678 // Store it to the final location. Remember the store.
10679 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10680 ST->getPointerInfo().getWithOffset(Offset),
10681 ST->getBaseAlign(),
10682 ST->getMemOperand()->getFlags()));
10683 // Increment the pointers.
10684 Offset += RegBytes;
10685 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10686 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10687 }
10688
10689 // The last store may be partial. Do a truncating store. On big-endian
10690 // machines this requires an extending load from the stack slot to ensure
10691 // that the bits are in the right place.
10692 EVT LoadMemVT =
10693 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10694
10695 // Load from the stack slot.
10696 SDValue Load = DAG.getExtLoad(
10697 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10698 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10699
10700 Stores.push_back(DAG.getTruncStore(
10701 Load.getValue(1), dl, Load, Ptr,
10702 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10703 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10704 // The order of the stores doesn't matter - say it with a TokenFactor.
10705 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10706 return Result;
10707 }
10708
10709 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10710 "Unaligned store of unknown type.");
10711 // Get the half-size VT
10712 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10713 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10714 unsigned IncrementSize = NumBits / 8;
10715
10716 // Divide the stored value in two parts.
10717 SDValue ShiftAmount =
10718 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10719 SDValue Lo = Val;
10720 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10721 // fold and not use the upper bits. A smaller constant may be easier to
10722 // materialize.
10723 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10724 Lo = DAG.getNode(
10725 ISD::AND, dl, VT, Lo,
10726 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10727 VT));
10728 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10729
10730 // Store the two parts
10731 SDValue Store1, Store2;
10732 Store1 = DAG.getTruncStore(Chain, dl,
10733 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10734 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10735 ST->getMemOperand()->getFlags());
10736
10737 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10738 Store2 = DAG.getTruncStore(
10739 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10740 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10741 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10742
10743 SDValue Result =
10744 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10745 return Result;
10746}
10747
10748SDValue
10750 const SDLoc &DL, EVT DataVT,
10751 SelectionDAG &DAG,
10752 bool IsCompressedMemory) const {
10754 EVT AddrVT = Addr.getValueType();
10755 EVT MaskVT = Mask.getValueType();
10756 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10757 "Incompatible types of Data and Mask");
10758 if (IsCompressedMemory) {
10759 // Incrementing the pointer according to number of '1's in the mask.
10760 if (DataVT.isScalableVector()) {
10761 EVT MaskExtVT = MaskVT.changeElementType(*DAG.getContext(), MVT::i32);
10762 SDValue MaskExt = DAG.getNode(ISD::ZERO_EXTEND, DL, MaskExtVT, Mask);
10763 Increment = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, MaskExt);
10764 } else {
10765 EVT MaskIntVT =
10766 EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10767 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10768 if (MaskIntVT.getSizeInBits() < 32) {
10769 MaskInIntReg =
10770 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10771 MaskIntVT = MVT::i32;
10772 }
10773 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10774 }
10775 // Scale is an element size in bytes.
10776 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10777 AddrVT);
10778 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10779 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10780 } else
10781 Increment = DAG.getTypeSize(DL, AddrVT, DataVT.getStoreSize());
10782
10783 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10784}
10785
10787 EVT VecVT, const SDLoc &dl,
10788 ElementCount SubEC) {
10789 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10790 "Cannot index a scalable vector within a fixed-width vector");
10791
10792 unsigned NElts = VecVT.getVectorMinNumElements();
10793 unsigned NumSubElts = SubEC.getKnownMinValue();
10794 EVT IdxVT = Idx.getValueType();
10795
10796 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10797 // If this is a constant index and we know the value plus the number of the
10798 // elements in the subvector minus one is less than the minimum number of
10799 // elements then it's safe to return Idx.
10800 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10801 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10802 return Idx;
10803 SDValue VS =
10804 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10805 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10806 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10807 DAG.getConstant(NumSubElts, dl, IdxVT));
10808 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10809 }
10810 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10811 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10812 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10813 DAG.getConstant(Imm, dl, IdxVT));
10814 }
10815 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10816 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10817 DAG.getConstant(MaxIndex, dl, IdxVT));
10818}
10819
10820SDValue
10822 EVT VecVT, SDValue Index,
10823 const SDNodeFlags PtrArithFlags) const {
10825 DAG, VecPtr, VecVT,
10827 Index, PtrArithFlags);
10828}
10829
10830SDValue
10832 EVT VecVT, EVT SubVecVT, SDValue Index,
10833 const SDNodeFlags PtrArithFlags) const {
10834 SDLoc dl(Index);
10835 // Make sure the index type is big enough to compute in.
10836 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10837
10838 EVT EltVT = VecVT.getVectorElementType();
10839
10840 // Calculate the element offset and add it to the pointer.
10841 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10842 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10843 "Converting bits to bytes lost precision");
10844 assert(SubVecVT.getVectorElementType() == EltVT &&
10845 "Sub-vector must be a vector with matching element type");
10846 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10847 SubVecVT.getVectorElementCount());
10848
10849 EVT IdxVT = Index.getValueType();
10850 if (SubVecVT.isScalableVector())
10851 Index =
10852 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10853 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10854
10855 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10856 DAG.getConstant(EltSize, dl, IdxVT));
10857 return DAG.getMemBasePlusOffset(VecPtr, Index, dl, PtrArithFlags);
10858}
10859
10860//===----------------------------------------------------------------------===//
10861// Implementation of Emulated TLS Model
10862//===----------------------------------------------------------------------===//
10863
10865 SelectionDAG &DAG) const {
10866 // Access to address of TLS varialbe xyz is lowered to a function call:
10867 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10868 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10869 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10870 SDLoc dl(GA);
10871
10872 ArgListTy Args;
10873 const GlobalValue *GV =
10875 SmallString<32> NameString("__emutls_v.");
10876 NameString += GV->getName();
10877 StringRef EmuTlsVarName(NameString);
10878 const GlobalVariable *EmuTlsVar =
10879 GV->getParent()->getNamedGlobal(EmuTlsVarName);
10880 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10881 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
10882
10883 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10884
10886 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10887 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10888 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10889
10890 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10891 // At last for X86 targets, maybe good for other targets too?
10893 MFI.setAdjustsStack(true); // Is this only for X86 target?
10894 MFI.setHasCalls(true);
10895
10896 assert((GA->getOffset() == 0) &&
10897 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10898 return CallResult.first;
10899}
10900
10902 SelectionDAG &DAG) const {
10903 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10904 if (!isCtlzFast())
10905 return SDValue();
10906 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10907 SDLoc dl(Op);
10908 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10909 EVT VT = Op.getOperand(0).getValueType();
10910 SDValue Zext = Op.getOperand(0);
10911 if (VT.bitsLT(MVT::i32)) {
10912 VT = MVT::i32;
10913 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10914 }
10915 unsigned Log2b = Log2_32(VT.getSizeInBits());
10916 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10917 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10918 DAG.getConstant(Log2b, dl, MVT::i32));
10919 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10920 }
10921 return SDValue();
10922}
10923
10925 SDValue Op0 = Node->getOperand(0);
10926 SDValue Op1 = Node->getOperand(1);
10927 EVT VT = Op0.getValueType();
10928 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10929 unsigned Opcode = Node->getOpcode();
10930 SDLoc DL(Node);
10931
10932 // If both sign bits are zero, flip UMIN/UMAX <-> SMIN/SMAX if legal.
10933 unsigned AltOpcode = ISD::getOppositeSignednessMinMaxOpcode(Opcode);
10934 if (isOperationLegal(AltOpcode, VT) && DAG.SignBitIsZero(Op0) &&
10935 DAG.SignBitIsZero(Op1))
10936 return DAG.getNode(AltOpcode, DL, VT, Op0, Op1);
10937
10938 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10939 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10941 Op0 = DAG.getFreeze(Op0);
10942 SDValue Zero = DAG.getConstant(0, DL, VT);
10943 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10944 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10945 }
10946
10947 // umin(x,y) -> sub(x,usubsat(x,y))
10948 // TODO: Missing freeze(Op0)?
10949 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10951 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10952 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10953 }
10954
10955 // umax(x,y) -> add(x,usubsat(y,x))
10956 // TODO: Missing freeze(Op0)?
10957 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10959 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10960 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10961 }
10962
10963 // FIXME: Should really try to split the vector in case it's legal on a
10964 // subvector.
10966 return DAG.UnrollVectorOp(Node);
10967
10968 // Attempt to find an existing SETCC node that we can reuse.
10969 // TODO: Do we need a generic doesSETCCNodeExist?
10970 // TODO: Missing freeze(Op0)/freeze(Op1)?
10971 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10972 ISD::CondCode PrefCommuteCC,
10973 ISD::CondCode AltCommuteCC) {
10974 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10975 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10976 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10977 {Op0, Op1, DAG.getCondCode(CC)})) {
10978 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10979 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10980 }
10981 }
10982 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10983 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10984 {Op0, Op1, DAG.getCondCode(CC)})) {
10985 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10986 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10987 }
10988 }
10989 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10990 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10991 };
10992
10993 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10994 // -> Y = (A < B) ? B : A
10995 // -> Y = (A >= B) ? A : B
10996 // -> Y = (A <= B) ? B : A
10997 switch (Opcode) {
10998 case ISD::SMAX:
10999 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
11000 case ISD::SMIN:
11001 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
11002 case ISD::UMAX:
11003 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
11004 case ISD::UMIN:
11005 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
11006 }
11007
11008 llvm_unreachable("How did we get here?");
11009}
11010
11012 unsigned Opcode = Node->getOpcode();
11013 SDValue LHS = Node->getOperand(0);
11014 SDValue RHS = Node->getOperand(1);
11015 EVT VT = LHS.getValueType();
11016 SDLoc dl(Node);
11017
11018 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11019 assert(VT.isInteger() && "Expected operands to be integers");
11020
11021 // usub.sat(a, b) -> umax(a, b) - b
11022 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
11023 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
11024 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
11025 }
11026
11027 // usub.sat(a, 1) -> sub(a, zext(a != 0))
11028 if (Opcode == ISD::USUBSAT && isOneOrOneSplat(RHS)) {
11029 LHS = DAG.getFreeze(LHS);
11030 SDValue Zero = DAG.getConstant(0, dl, VT);
11031 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11032 SDValue IsNonZero = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETNE);
11033 SDValue Subtrahend = DAG.getBoolExtOrTrunc(IsNonZero, dl, VT, BoolVT);
11034 Subtrahend =
11035 DAG.getNode(ISD::AND, dl, VT, Subtrahend, DAG.getConstant(1, dl, VT));
11036 return DAG.getNode(ISD::SUB, dl, VT, LHS, Subtrahend);
11037 }
11038
11039 // uadd.sat(a, b) -> umin(a, ~b) + b
11040 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
11041 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
11042 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
11043 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
11044 }
11045
11046 unsigned OverflowOp;
11047 switch (Opcode) {
11048 case ISD::SADDSAT:
11049 OverflowOp = ISD::SADDO;
11050 break;
11051 case ISD::UADDSAT:
11052 OverflowOp = ISD::UADDO;
11053 break;
11054 case ISD::SSUBSAT:
11055 OverflowOp = ISD::SSUBO;
11056 break;
11057 case ISD::USUBSAT:
11058 OverflowOp = ISD::USUBO;
11059 break;
11060 default:
11061 llvm_unreachable("Expected method to receive signed or unsigned saturation "
11062 "addition or subtraction node.");
11063 }
11064
11065 // FIXME: Should really try to split the vector in case it's legal on a
11066 // subvector.
11068 return DAG.UnrollVectorOp(Node);
11069
11070 unsigned BitWidth = LHS.getScalarValueSizeInBits();
11071 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11072 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11073 SDValue SumDiff = Result.getValue(0);
11074 SDValue Overflow = Result.getValue(1);
11075 SDValue Zero = DAG.getConstant(0, dl, VT);
11076 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
11077
11078 if (Opcode == ISD::UADDSAT) {
11080 // (LHS + RHS) | OverflowMask
11081 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11082 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
11083 }
11084 // Overflow ? 0xffff.... : (LHS + RHS)
11085 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
11086 }
11087
11088 if (Opcode == ISD::USUBSAT) {
11090 // (LHS - RHS) & ~OverflowMask
11091 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11092 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
11093 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
11094 }
11095 // Overflow ? 0 : (LHS - RHS)
11096 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
11097 }
11098
11099 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
11102
11103 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
11104 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
11105
11106 // If either of the operand signs are known, then they are guaranteed to
11107 // only saturate in one direction. If non-negative they will saturate
11108 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
11109 //
11110 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
11111 // sign of 'y' has to be flipped.
11112
11113 bool LHSIsNonNegative = KnownLHS.isNonNegative();
11114 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
11115 : KnownRHS.isNegative();
11116 if (LHSIsNonNegative || RHSIsNonNegative) {
11117 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11118 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
11119 }
11120
11121 bool LHSIsNegative = KnownLHS.isNegative();
11122 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
11123 : KnownRHS.isNonNegative();
11124 if (LHSIsNegative || RHSIsNegative) {
11125 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11126 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
11127 }
11128 }
11129
11130 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
11132 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11133 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
11134 DAG.getConstant(BitWidth - 1, dl, VT));
11135 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
11136 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
11137}
11138
11140 unsigned Opcode = Node->getOpcode();
11141 SDValue LHS = Node->getOperand(0);
11142 SDValue RHS = Node->getOperand(1);
11143 EVT VT = LHS.getValueType();
11144 EVT ResVT = Node->getValueType(0);
11145 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11146 SDLoc dl(Node);
11147
11148 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
11149 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
11150 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
11151 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
11152
11153 // We can't perform arithmetic on i1 values. Extending them would
11154 // probably result in worse codegen, so let's just use two selects instead.
11155 // Some targets are also just better off using selects rather than subtraction
11156 // because one of the conditions can be merged with one of the selects.
11157 // And finally, if we don't know the contents of high bits of a boolean value
11158 // we can't perform any arithmetic either.
11160 BoolVT.getScalarSizeInBits() == 1 ||
11162 SDValue SelectZeroOrOne =
11163 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
11164 DAG.getConstant(0, dl, ResVT));
11165 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
11166 SelectZeroOrOne);
11167 }
11168
11170 std::swap(IsGT, IsLT);
11171 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
11172 ResVT);
11173}
11174
11176 unsigned Opcode = Node->getOpcode();
11177 bool IsSigned = Opcode == ISD::SSHLSAT;
11178 SDValue LHS = Node->getOperand(0);
11179 SDValue RHS = Node->getOperand(1);
11180 EVT VT = LHS.getValueType();
11181 SDLoc dl(Node);
11182
11183 assert((Node->getOpcode() == ISD::SSHLSAT ||
11184 Node->getOpcode() == ISD::USHLSAT) &&
11185 "Expected a SHLSAT opcode");
11186 assert(VT.isInteger() && "Expected operands to be integers");
11187
11189 return DAG.UnrollVectorOp(Node);
11190
11191 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
11192
11193 unsigned BW = VT.getScalarSizeInBits();
11194 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11195 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
11196 SDValue Orig =
11197 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
11198
11199 SDValue SatVal;
11200 if (IsSigned) {
11201 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
11202 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
11203 SDValue Cond =
11204 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
11205 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
11206 } else {
11207 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
11208 }
11209 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
11210 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
11211}
11212
11214 bool Signed, SDValue &Lo, SDValue &Hi,
11215 SDValue LHS, SDValue RHS,
11216 SDValue HiLHS, SDValue HiRHS) const {
11217 EVT VT = LHS.getValueType();
11218 assert(RHS.getValueType() == VT && "Mismatching operand types");
11219
11220 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
11221 assert((!Signed || !HiLHS) &&
11222 "Signed flag should only be set when HiLHS and RiRHS are null");
11223
11224 // We'll expand the multiplication by brute force because we have no other
11225 // options. This is a trivially-generalized version of the code from
11226 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
11227 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11228 // sign bits while calculating the Hi half.
11229 unsigned Bits = VT.getSizeInBits();
11230 unsigned HalfBits = Bits / 2;
11231 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
11232 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
11233 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
11234
11235 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
11236 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
11237
11238 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
11239 // This is always an unsigned shift.
11240 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
11241
11242 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11243 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
11244 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
11245
11246 SDValue U =
11247 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
11248 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
11249 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
11250
11251 SDValue V =
11252 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
11253 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
11254
11255 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
11256 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
11257
11258 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
11259 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
11260
11261 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11262 // the products to Hi.
11263 if (HiLHS) {
11264 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
11265 DAG.getNode(ISD::ADD, dl, VT,
11266 DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS),
11267 DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS)));
11268 }
11269}
11270
11272 bool Signed, const SDValue LHS,
11273 const SDValue RHS, SDValue &Lo,
11274 SDValue &Hi) const {
11275 EVT VT = LHS.getValueType();
11276 assert(RHS.getValueType() == VT && "Mismatching operand types");
11277 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
11278 // We can fall back to a libcall with an illegal type for the MUL if we
11279 // have a libcall big enough.
11280 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11281 if (WideVT == MVT::i16)
11282 LC = RTLIB::MUL_I16;
11283 else if (WideVT == MVT::i32)
11284 LC = RTLIB::MUL_I32;
11285 else if (WideVT == MVT::i64)
11286 LC = RTLIB::MUL_I64;
11287 else if (WideVT == MVT::i128)
11288 LC = RTLIB::MUL_I128;
11289
11290 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
11291 if (LibcallImpl == RTLIB::Unsupported) {
11292 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11293 return;
11294 }
11295
11296 SDValue HiLHS, HiRHS;
11297 if (Signed) {
11298 // The high part is obtained by SRA'ing all but one of the bits of low
11299 // part.
11300 unsigned LoSize = VT.getFixedSizeInBits();
11301 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
11302 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
11303 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
11304 } else {
11305 HiLHS = DAG.getConstant(0, dl, VT);
11306 HiRHS = DAG.getConstant(0, dl, VT);
11307 }
11308
11309 // Attempt a libcall.
11310 SDValue Ret;
11312 CallOptions.setIsSigned(Signed);
11313 CallOptions.setIsPostTypeLegalization(true);
11315 // Halves of WideVT are packed into registers in different order
11316 // depending on platform endianness. This is usually handled by
11317 // the C calling convention, but we can't defer to it in
11318 // the legalizer.
11319 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11320 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11321 } else {
11322 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11323 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11324 }
11326 "Ret value is a collection of constituent nodes holding result.");
11327 if (DAG.getDataLayout().isLittleEndian()) {
11328 // Same as above.
11329 Lo = Ret.getOperand(0);
11330 Hi = Ret.getOperand(1);
11331 } else {
11332 Lo = Ret.getOperand(1);
11333 Hi = Ret.getOperand(0);
11334 }
11335}
11336
11337SDValue
11339 assert((Node->getOpcode() == ISD::SMULFIX ||
11340 Node->getOpcode() == ISD::UMULFIX ||
11341 Node->getOpcode() == ISD::SMULFIXSAT ||
11342 Node->getOpcode() == ISD::UMULFIXSAT) &&
11343 "Expected a fixed point multiplication opcode");
11344
11345 SDLoc dl(Node);
11346 SDValue LHS = Node->getOperand(0);
11347 SDValue RHS = Node->getOperand(1);
11348 EVT VT = LHS.getValueType();
11349 unsigned Scale = Node->getConstantOperandVal(2);
11350 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
11351 Node->getOpcode() == ISD::UMULFIXSAT);
11352 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
11353 Node->getOpcode() == ISD::SMULFIXSAT);
11354 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11355 unsigned VTSize = VT.getScalarSizeInBits();
11356
11357 if (!Scale) {
11358 // [us]mul.fix(a, b, 0) -> mul(a, b)
11359 if (!Saturating) {
11361 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11362 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
11363 SDValue Result =
11364 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11365 SDValue Product = Result.getValue(0);
11366 SDValue Overflow = Result.getValue(1);
11367 SDValue Zero = DAG.getConstant(0, dl, VT);
11368
11369 APInt MinVal = APInt::getSignedMinValue(VTSize);
11370 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
11371 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11372 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11373 // Xor the inputs, if resulting sign bit is 0 the product will be
11374 // positive, else negative.
11375 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
11376 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
11377 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
11378 return DAG.getSelect(dl, VT, Overflow, Result, Product);
11379 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
11380 SDValue Result =
11381 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11382 SDValue Product = Result.getValue(0);
11383 SDValue Overflow = Result.getValue(1);
11384
11385 APInt MaxVal = APInt::getMaxValue(VTSize);
11386 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11387 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
11388 }
11389 }
11390
11391 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11392 "Expected scale to be less than the number of bits if signed or at "
11393 "most the number of bits if unsigned.");
11394 assert(LHS.getValueType() == RHS.getValueType() &&
11395 "Expected both operands to be the same type");
11396
11397 // Get the upper and lower bits of the result.
11398 SDValue Lo, Hi;
11399 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11400 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11401 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
11402 if (VT.isVector())
11403 WideVT =
11405 if (isOperationLegalOrCustom(LoHiOp, VT)) {
11406 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
11407 Lo = Result.getValue(0);
11408 Hi = Result.getValue(1);
11409 } else if (isOperationLegalOrCustom(HiOp, VT)) {
11410 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11411 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
11412 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11413 // Try for a multiplication using a wider type.
11414 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11415 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
11416 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
11417 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11418 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11419 SDValue Shifted =
11420 DAG.getNode(ISD::SRA, dl, WideVT, Res,
11421 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11422 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11423 } else if (VT.isVector()) {
11424 return SDValue();
11425 } else {
11426 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11427 }
11428
11429 if (Scale == VTSize)
11430 // Result is just the top half since we'd be shifting by the width of the
11431 // operand. Overflow impossible so this works for both UMULFIX and
11432 // UMULFIXSAT.
11433 return Hi;
11434
11435 // The result will need to be shifted right by the scale since both operands
11436 // are scaled. The result is given to us in 2 halves, so we only want part of
11437 // both in the result.
11438 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11439 DAG.getShiftAmountConstant(Scale, VT, dl));
11440 if (!Saturating)
11441 return Result;
11442
11443 if (!Signed) {
11444 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11445 // widened multiplication) aren't all zeroes.
11446
11447 // Saturate to max if ((Hi >> Scale) != 0),
11448 // which is the same as if (Hi > ((1 << Scale) - 1))
11449 APInt MaxVal = APInt::getMaxValue(VTSize);
11450 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11451 dl, VT);
11452 Result = DAG.getSelectCC(dl, Hi, LowMask,
11453 DAG.getConstant(MaxVal, dl, VT), Result,
11454 ISD::SETUGT);
11455
11456 return Result;
11457 }
11458
11459 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11460 // widened multiplication) aren't all ones or all zeroes.
11461
11462 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11463 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11464
11465 if (Scale == 0) {
11466 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11467 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11468 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11469 // Saturated to SatMin if wide product is negative, and SatMax if wide
11470 // product is positive ...
11471 SDValue Zero = DAG.getConstant(0, dl, VT);
11472 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11473 ISD::SETLT);
11474 // ... but only if we overflowed.
11475 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11476 }
11477
11478 // We handled Scale==0 above so all the bits to examine is in Hi.
11479
11480 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11481 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11482 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11483 dl, VT);
11484 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11485 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11486 // which is the same as if (HI < (-1 << (Scale - 1))
11487 SDValue HighMask =
11488 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11489 dl, VT);
11490 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11491 return Result;
11492}
11493
11494SDValue
11496 SDValue LHS, SDValue RHS,
11497 unsigned Scale, SelectionDAG &DAG) const {
11498 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11499 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11500 "Expected a fixed point division opcode");
11501
11502 EVT VT = LHS.getValueType();
11503 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11504 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11505 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11506
11507 // If there is enough room in the type to upscale the LHS or downscale the
11508 // RHS before the division, we can perform it in this type without having to
11509 // resize. For signed operations, the LHS headroom is the number of
11510 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11511 // The headroom for the RHS is the number of trailing zeroes.
11512 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11514 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11515
11516 // For signed saturating operations, we need to be able to detect true integer
11517 // division overflow; that is, when you have MIN / -EPS. However, this
11518 // is undefined behavior and if we emit divisions that could take such
11519 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11520 // example).
11521 // Avoid this by requiring an extra bit so that we never get this case.
11522 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11523 // signed saturating division, we need to emit a whopping 32-bit division.
11524 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11525 return SDValue();
11526
11527 unsigned LHSShift = std::min(LHSLead, Scale);
11528 unsigned RHSShift = Scale - LHSShift;
11529
11530 // At this point, we know that if we shift the LHS up by LHSShift and the
11531 // RHS down by RHSShift, we can emit a regular division with a final scaling
11532 // factor of Scale.
11533
11534 if (LHSShift)
11535 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11536 DAG.getShiftAmountConstant(LHSShift, VT, dl));
11537 if (RHSShift)
11538 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11539 DAG.getShiftAmountConstant(RHSShift, VT, dl));
11540
11541 SDValue Quot;
11542 if (Signed) {
11543 // For signed operations, if the resulting quotient is negative and the
11544 // remainder is nonzero, subtract 1 from the quotient to round towards
11545 // negative infinity.
11546 SDValue Rem;
11547 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11548 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11549 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11550 if (isTypeLegal(VT) &&
11552 Quot = DAG.getNode(ISD::SDIVREM, dl,
11553 DAG.getVTList(VT, VT),
11554 LHS, RHS);
11555 Rem = Quot.getValue(1);
11556 Quot = Quot.getValue(0);
11557 } else {
11558 Quot = DAG.getNode(ISD::SDIV, dl, VT,
11559 LHS, RHS);
11560 Rem = DAG.getNode(ISD::SREM, dl, VT,
11561 LHS, RHS);
11562 }
11563 SDValue Zero = DAG.getConstant(0, dl, VT);
11564 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11565 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11566 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11567 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11568 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11569 DAG.getConstant(1, dl, VT));
11570 Quot = DAG.getSelect(dl, VT,
11571 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11572 Sub1, Quot);
11573 } else
11574 Quot = DAG.getNode(ISD::UDIV, dl, VT,
11575 LHS, RHS);
11576
11577 return Quot;
11578}
11579
11581 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11582 SDLoc dl(Node);
11583 SDValue LHS = Node->getOperand(0);
11584 SDValue RHS = Node->getOperand(1);
11585 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11586
11587 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11588 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11589 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11590 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11591 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11592 { LHS, RHS, CarryIn });
11593 Result = SDValue(NodeCarry.getNode(), 0);
11594 Overflow = SDValue(NodeCarry.getNode(), 1);
11595 return;
11596 }
11597
11598 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11599 LHS.getValueType(), LHS, RHS);
11600
11601 EVT ResultType = Node->getValueType(1);
11602 EVT SetCCType = getSetCCResultType(
11603 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11604 SDValue SetCC;
11605 if (IsAdd && isOneConstant(RHS)) {
11606 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11607 // the live range of X. We assume comparing with 0 is cheap.
11608 // The general case (X + C) < C is not necessarily beneficial. Although we
11609 // reduce the live range of X, we may introduce the materialization of
11610 // constant C.
11611 SetCC =
11612 DAG.getSetCC(dl, SetCCType, Result,
11613 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11614 } else if (IsAdd && isAllOnesConstant(RHS)) {
11615 // Special case: uaddo X, -1 overflows if X != 0.
11616 SetCC =
11617 DAG.getSetCC(dl, SetCCType, LHS,
11618 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11619 } else {
11620 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11621 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11622 }
11623 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11624}
11625
11627 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11628 SDLoc dl(Node);
11629 SDValue LHS = Node->getOperand(0);
11630 SDValue RHS = Node->getOperand(1);
11631 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11632
11633 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11634 LHS.getValueType(), LHS, RHS);
11635
11636 EVT ResultType = Node->getValueType(1);
11637 EVT OType = getSetCCResultType(
11638 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11639
11640 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11641 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11642 if (isOperationLegal(OpcSat, LHS.getValueType())) {
11643 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11644 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11645 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11646 return;
11647 }
11648
11649 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11650
11651 // For an addition, the result should be less than one of the operands (LHS)
11652 // if and only if the other operand (RHS) is negative, otherwise there will
11653 // be overflow.
11654 // For a subtraction, the result should be less than one of the operands
11655 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11656 // otherwise there will be overflow.
11657 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11658 SDValue ConditionRHS =
11659 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11660
11661 Overflow = DAG.getBoolExtOrTrunc(
11662 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11663 ResultType, ResultType);
11664}
11665
11667 SDValue &Overflow, SelectionDAG &DAG) const {
11668 SDLoc dl(Node);
11669 EVT VT = Node->getValueType(0);
11670 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11671 SDValue LHS = Node->getOperand(0);
11672 SDValue RHS = Node->getOperand(1);
11673 bool isSigned = Node->getOpcode() == ISD::SMULO;
11674
11675 // For power-of-two multiplications we can use a simpler shift expansion.
11676 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11677 const APInt &C = RHSC->getAPIntValue();
11678 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11679 if (C.isPowerOf2()) {
11680 // smulo(x, signed_min) is same as umulo(x, signed_min).
11681 bool UseArithShift = isSigned && !C.isMinSignedValue();
11682 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11683 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11684 Overflow = DAG.getSetCC(dl, SetCCVT,
11685 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11686 dl, VT, Result, ShiftAmt),
11687 LHS, ISD::SETNE);
11688 return true;
11689 }
11690 }
11691
11692 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11693 if (VT.isVector())
11694 WideVT =
11696
11697 SDValue BottomHalf;
11698 SDValue TopHalf;
11699 static const unsigned Ops[2][3] =
11702 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11703 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11704 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11705 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11706 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11707 RHS);
11708 TopHalf = BottomHalf.getValue(1);
11709 } else if (isTypeLegal(WideVT)) {
11710 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11711 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11712 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11713 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11714 SDValue ShiftAmt =
11715 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11716 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11717 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11718 } else {
11719 if (VT.isVector())
11720 return false;
11721
11722 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11723 }
11724
11725 Result = BottomHalf;
11726 if (isSigned) {
11727 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11728 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11729 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11730 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11731 } else {
11732 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11733 DAG.getConstant(0, dl, VT), ISD::SETNE);
11734 }
11735
11736 // Truncate the result if SetCC returns a larger type than needed.
11737 EVT RType = Node->getValueType(1);
11738 if (RType.bitsLT(Overflow.getValueType()))
11739 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11740
11741 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11742 "Unexpected result type for S/UMULO legalization");
11743 return true;
11744}
11745
11747 SDLoc dl(Node);
11748 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11749 SDValue Op = Node->getOperand(0);
11750 EVT VT = Op.getValueType();
11751
11752 // Try to use a shuffle reduction for power of two vectors.
11753 if (VT.isPow2VectorType()) {
11755 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11756 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11757 break;
11758
11759 SDValue Lo, Hi;
11760 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11761 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11762 VT = HalfVT;
11763
11764 // Stop if splitting is enough to make the reduction legal.
11765 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
11766 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
11767 Node->getFlags());
11768 }
11769 }
11770
11771 if (VT.isScalableVector())
11773 "Expanding reductions for scalable vectors is undefined.");
11774
11775 EVT EltVT = VT.getVectorElementType();
11776 unsigned NumElts = VT.getVectorNumElements();
11777
11779 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11780
11781 SDValue Res = Ops[0];
11782 for (unsigned i = 1; i < NumElts; i++)
11783 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11784
11785 // Result type may be wider than element type.
11786 if (EltVT != Node->getValueType(0))
11787 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11788 return Res;
11789}
11790
11792 SDLoc dl(Node);
11793 SDValue AccOp = Node->getOperand(0);
11794 SDValue VecOp = Node->getOperand(1);
11795 SDNodeFlags Flags = Node->getFlags();
11796
11797 EVT VT = VecOp.getValueType();
11798 EVT EltVT = VT.getVectorElementType();
11799
11800 if (VT.isScalableVector())
11802 "Expanding reductions for scalable vectors is undefined.");
11803
11804 unsigned NumElts = VT.getVectorNumElements();
11805
11807 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11808
11809 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11810
11811 SDValue Res = AccOp;
11812 for (unsigned i = 0; i < NumElts; i++)
11813 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11814
11815 return Res;
11816}
11817
11819 SelectionDAG &DAG) const {
11820 EVT VT = Node->getValueType(0);
11821 SDLoc dl(Node);
11822 bool isSigned = Node->getOpcode() == ISD::SREM;
11823 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11824 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11825 SDValue Dividend = Node->getOperand(0);
11826 SDValue Divisor = Node->getOperand(1);
11827 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11828 SDVTList VTs = DAG.getVTList(VT, VT);
11829 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11830 return true;
11831 }
11832 if (isOperationLegalOrCustom(DivOpc, VT)) {
11833 // X % Y -> X-X/Y*Y
11834 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11835 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11836 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11837 return true;
11838 }
11839 return false;
11840}
11841
11843 SelectionDAG &DAG) const {
11844 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11845 SDLoc dl(SDValue(Node, 0));
11846 SDValue Src = Node->getOperand(0);
11847
11848 // DstVT is the result type, while SatVT is the size to which we saturate
11849 EVT SrcVT = Src.getValueType();
11850 EVT DstVT = Node->getValueType(0);
11851
11852 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11853 unsigned SatWidth = SatVT.getScalarSizeInBits();
11854 unsigned DstWidth = DstVT.getScalarSizeInBits();
11855 assert(SatWidth <= DstWidth &&
11856 "Expected saturation width smaller than result width");
11857
11858 // Determine minimum and maximum integer values and their corresponding
11859 // floating-point values.
11860 APInt MinInt, MaxInt;
11861 if (IsSigned) {
11862 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11863 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11864 } else {
11865 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11866 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11867 }
11868
11869 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11870 // libcall emission cannot handle this. Large result types will fail.
11871 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11872 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11873 SrcVT = Src.getValueType();
11874 }
11875
11876 const fltSemantics &Sem = SrcVT.getFltSemantics();
11877 APFloat MinFloat(Sem);
11878 APFloat MaxFloat(Sem);
11879
11880 APFloat::opStatus MinStatus =
11881 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11882 APFloat::opStatus MaxStatus =
11883 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11884 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11885 !(MaxStatus & APFloat::opStatus::opInexact);
11886
11887 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11888 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11889
11890 // If the integer bounds are exactly representable as floats and min/max are
11891 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11892 // of comparisons and selects.
11893 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11895 if (AreExactFloatBounds && MinMaxLegal) {
11896 SDValue Clamped = Src;
11897
11898 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11899 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11900 // Clamp by MaxFloat from above. NaN cannot occur.
11901 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11902 // Convert clamped value to integer.
11903 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11904 dl, DstVT, Clamped);
11905
11906 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11907 // which will cast to zero.
11908 if (!IsSigned)
11909 return FpToInt;
11910
11911 // Otherwise, select 0 if Src is NaN.
11912 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11913 EVT SetCCVT =
11914 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11915 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11916 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11917 }
11918
11919 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11920 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11921
11922 // Result of direct conversion. The assumption here is that the operation is
11923 // non-trapping and it's fine to apply it to an out-of-range value if we
11924 // select it away later.
11925 SDValue FpToInt =
11926 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11927
11928 SDValue Select = FpToInt;
11929
11930 EVT SetCCVT =
11931 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11932
11933 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11934 // MinInt if Src is NaN.
11935 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11936 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11937 // If Src OGT MaxFloat, select MaxInt.
11938 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11939 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11940
11941 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11942 // is already zero.
11943 if (!IsSigned)
11944 return Select;
11945
11946 // Otherwise, select 0 if Src is NaN.
11947 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11948 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11949 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11950}
11951
11953 const SDLoc &dl,
11954 SelectionDAG &DAG) const {
11955 EVT OperandVT = Op.getValueType();
11956 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11957 return Op;
11958 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11959 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11960 // can induce double-rounding which may alter the results. We can
11961 // correct for this using a trick explained in: Boldo, Sylvie, and
11962 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11963 // World Congress. 2005.
11964 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
11965 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
11966
11967 // We can keep the narrow value as-is if narrowing was exact (no
11968 // rounding error), the wide value was NaN (the narrow value is also
11969 // NaN and should be preserved) or if we rounded to the odd value.
11970 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
11971 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11972 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11973 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11974 EVT ResultIntVTCCVT = getSetCCResultType(
11975 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11976 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11977 // The result is already odd so we don't need to do anything.
11978 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11979
11980 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11981 Op.getValueType());
11982 // We keep results which are exact, odd or NaN.
11983 SDValue KeepNarrow =
11984 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
11985 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11986 // We morally performed a round-down if AbsNarrow is smaller than
11987 // AbsWide.
11988 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11989 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
11990 SDValue NarrowIsRd =
11991 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11992 // If the narrow value is odd or exact, pick it.
11993 // Otherwise, narrow is even and corresponds to either the rounded-up
11994 // or rounded-down value. If narrow is the rounded-down value, we want
11995 // the rounded-up value as it will be odd.
11996 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11997 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11998 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11999 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
12000}
12001
12003 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
12004 SDValue Op = Node->getOperand(0);
12005 EVT VT = Node->getValueType(0);
12006 SDLoc dl(Node);
12007 if (VT.getScalarType() == MVT::bf16) {
12008 if (Node->getConstantOperandVal(1) == 1) {
12009 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
12010 }
12011 EVT OperandVT = Op.getValueType();
12012 SDValue IsNaN = DAG.getSetCC(
12013 dl,
12014 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
12015 Op, Op, ISD::SETUO);
12016
12017 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12018 // can induce double-rounding which may alter the results. We can
12019 // correct for this using a trick explained in: Boldo, Sylvie, and
12020 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12021 // World Congress. 2005.
12022 EVT F32 = VT.changeElementType(*DAG.getContext(), MVT::f32);
12023 EVT I32 = F32.changeTypeToInteger();
12024 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
12025 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
12026
12027 // Conversions should set NaN's quiet bit. This also prevents NaNs from
12028 // turning into infinities.
12029 SDValue NaN =
12030 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
12031
12032 // Factor in the contribution of the low 16 bits.
12033 SDValue One = DAG.getConstant(1, dl, I32);
12034 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
12035 DAG.getShiftAmountConstant(16, I32, dl));
12036 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
12037 SDValue RoundingBias =
12038 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
12039 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
12040
12041 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
12042 // 0x80000000.
12043 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
12044
12045 // Now that we have rounded, shift the bits into position.
12046 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
12047 DAG.getShiftAmountConstant(16, I32, dl));
12048 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
12049 EVT I16 = I32.changeElementType(*DAG.getContext(), MVT::i16);
12050 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
12051 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
12052 }
12053 return SDValue();
12054}
12055
12057 SelectionDAG &DAG) const {
12058 assert((Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT ||
12059 Node->getOpcode() == ISD::VECTOR_SPLICE_RIGHT) &&
12060 "Unexpected opcode!");
12061 assert((Node->getValueType(0).isScalableVector() ||
12062 !isa<ConstantSDNode>(Node->getOperand(2))) &&
12063 "Fixed length vector types with constant offsets expected to use "
12064 "SHUFFLE_VECTOR!");
12065
12066 EVT VT = Node->getValueType(0);
12067 SDValue V1 = Node->getOperand(0);
12068 SDValue V2 = Node->getOperand(1);
12069 SDValue Offset = Node->getOperand(2);
12070 SDLoc DL(Node);
12071
12072 // Expand through memory thusly:
12073 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
12074 // Store V1, Ptr
12075 // Store V2, Ptr + sizeof(V1)
12076 // if (VECTOR_SPLICE_LEFT)
12077 // Ptr = Ptr + (Offset * sizeof(VT.Elt))
12078 // else
12079 // Ptr = Ptr + sizeof(V1) - (Offset * size(VT.Elt))
12080 // Res = Load Ptr
12081
12082 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
12083
12085 VT.getVectorElementCount() * 2);
12086 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12087 EVT PtrVT = StackPtr.getValueType();
12088 auto &MF = DAG.getMachineFunction();
12089 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12090 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12091
12092 // Store the lo part of CONCAT_VECTORS(V1, V2)
12093 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
12094 // Store the hi part of CONCAT_VECTORS(V1, V2)
12095 SDValue VTBytes = DAG.getTypeSize(DL, PtrVT, VT.getStoreSize());
12096 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, VTBytes);
12097 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
12098
12099 // NOTE: TrailingBytes must be clamped so as not to read outside of V1:V2.
12100 SDValue EltByteSize =
12101 DAG.getTypeSize(DL, PtrVT, VT.getVectorElementType().getStoreSize());
12102 Offset = DAG.getZExtOrTrunc(Offset, DL, PtrVT);
12103 SDValue TrailingBytes = DAG.getNode(ISD::MUL, DL, PtrVT, Offset, EltByteSize);
12104
12105 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VTBytes);
12106
12107 if (Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT)
12108 StackPtr = DAG.getMemBasePlusOffset(StackPtr, TrailingBytes, DL);
12109 else
12110 StackPtr = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
12111
12112 // Load the spliced result
12113 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
12115}
12116
12118 SelectionDAG &DAG) const {
12119 SDLoc DL(Node);
12120 SDValue Vec = Node->getOperand(0);
12121 SDValue Mask = Node->getOperand(1);
12122 SDValue Passthru = Node->getOperand(2);
12123
12124 EVT VecVT = Vec.getValueType();
12125 EVT ScalarVT = VecVT.getScalarType();
12126 EVT MaskVT = Mask.getValueType();
12127 EVT MaskScalarVT = MaskVT.getScalarType();
12128
12129 // Needs to be handled by targets that have scalable vector types.
12130 if (VecVT.isScalableVector())
12131 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
12132
12133 SDValue StackPtr = DAG.CreateStackTemporary(
12134 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
12135 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12136 MachinePointerInfo PtrInfo =
12138
12139 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
12140 SDValue Chain = DAG.getEntryNode();
12141 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
12142
12143 bool HasPassthru = !Passthru.isUndef();
12144
12145 // If we have a passthru vector, store it on the stack, overwrite the matching
12146 // positions and then re-write the last element that was potentially
12147 // overwritten even though mask[i] = false.
12148 if (HasPassthru)
12149 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
12150
12151 SDValue LastWriteVal;
12152 APInt PassthruSplatVal;
12153 bool IsSplatPassthru =
12154 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
12155
12156 if (IsSplatPassthru) {
12157 // As we do not know which position we wrote to last, we cannot simply
12158 // access that index from the passthru vector. So we first check if passthru
12159 // is a splat vector, to use any element ...
12160 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
12161 } else if (HasPassthru) {
12162 // ... if it is not a splat vector, we need to get the passthru value at
12163 // position = popcount(mask) and re-load it from the stack before it is
12164 // overwritten in the loop below.
12165 EVT PopcountVT = ScalarVT.changeTypeToInteger();
12166 SDValue Popcount = DAG.getNode(
12168 MaskVT.changeVectorElementType(*DAG.getContext(), MVT::i1), Mask);
12169 Popcount = DAG.getNode(
12171 MaskVT.changeVectorElementType(*DAG.getContext(), PopcountVT),
12172 Popcount);
12173 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
12174 SDValue LastElmtPtr =
12175 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
12176 LastWriteVal = DAG.getLoad(
12177 ScalarVT, DL, Chain, LastElmtPtr,
12179 Chain = LastWriteVal.getValue(1);
12180 }
12181
12182 unsigned NumElms = VecVT.getVectorNumElements();
12183 for (unsigned I = 0; I < NumElms; I++) {
12184 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
12185 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12186 Chain = DAG.getStore(
12187 Chain, DL, ValI, OutPtr,
12189
12190 // Get the mask value and add it to the current output position. This
12191 // either increments by 1 if MaskI is true or adds 0 otherwise.
12192 // Freeze in case we have poison/undef mask entries.
12193 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
12194 MaskI = DAG.getFreeze(MaskI);
12195 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
12196 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
12197 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
12198
12199 if (HasPassthru && I == NumElms - 1) {
12200 SDValue EndOfVector =
12201 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
12202 SDValue AllLanesSelected =
12203 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
12204 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
12205 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12206
12207 // Re-write the last ValI if all lanes were selected. Otherwise,
12208 // overwrite the last write it with the passthru value.
12209 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
12210 LastWriteVal, SDNodeFlags::Unpredictable);
12211 Chain = DAG.getStore(
12212 Chain, DL, LastWriteVal, OutPtr,
12214 }
12215 }
12216
12217 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12218}
12219
12221 SelectionDAG &DAG) const {
12222 SDLoc DL(N);
12223 SDValue Acc = N->getOperand(0);
12224 SDValue MulLHS = N->getOperand(1);
12225 SDValue MulRHS = N->getOperand(2);
12226 EVT AccVT = Acc.getValueType();
12227 EVT MulOpVT = MulLHS.getValueType();
12228
12229 EVT ExtMulOpVT =
12231 MulOpVT.getVectorElementCount());
12232
12233 unsigned ExtOpcLHS, ExtOpcRHS;
12234 switch (N->getOpcode()) {
12235 default:
12236 llvm_unreachable("Unexpected opcode");
12238 ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
12239 break;
12241 ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
12242 break;
12244 ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
12245 break;
12246 }
12247
12248 if (ExtMulOpVT != MulOpVT) {
12249 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
12250 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
12251 }
12252 SDValue Input = MulLHS;
12253 if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
12254 if (!llvm::isOneOrOneSplatFP(MulRHS))
12255 Input = DAG.getNode(ISD::FMUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12256 } else if (!llvm::isOneOrOneSplat(MulRHS)) {
12257 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12258 }
12259
12260 unsigned Stride = AccVT.getVectorMinNumElements();
12261 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12262
12263 // Collect all of the subvectors
12264 std::deque<SDValue> Subvectors = {Acc};
12265 for (unsigned I = 0; I < ScaleFactor; I++)
12266 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
12267
12268 unsigned FlatNode =
12269 N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
12270
12271 // Flatten the subvector tree
12272 while (Subvectors.size() > 1) {
12273 Subvectors.push_back(
12274 DAG.getNode(FlatNode, DL, AccVT, {Subvectors[0], Subvectors[1]}));
12275 Subvectors.pop_front();
12276 Subvectors.pop_front();
12277 }
12278
12279 assert(Subvectors.size() == 1 &&
12280 "There should only be one subvector after tree flattening");
12281
12282 return Subvectors[0];
12283}
12284
12285/// Given a store node \p StoreNode, return true if it is safe to fold that node
12286/// into \p FPNode, which expands to a library call with output pointers.
12288 SDNode *FPNode) {
12290 SmallVector<const SDNode *, 8> DeferredNodes;
12292
12293 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
12294 for (SDValue Op : StoreNode->ops())
12295 if (Op.getNode() != FPNode)
12296 Worklist.push_back(Op.getNode());
12297
12299 while (!Worklist.empty()) {
12300 const SDNode *Node = Worklist.pop_back_val();
12301 auto [_, Inserted] = Visited.insert(Node);
12302 if (!Inserted)
12303 continue;
12304
12305 if (MaxSteps > 0 && Visited.size() >= MaxSteps)
12306 return false;
12307
12308 // Reached the FPNode (would result in a cycle).
12309 // OR Reached CALLSEQ_START (would result in nested call sequences).
12310 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
12311 return false;
12312
12313 if (Node->getOpcode() == ISD::CALLSEQ_END) {
12314 // Defer looking into call sequences (so we can check we're outside one).
12315 // We still need to look through these for the predecessor check.
12316 DeferredNodes.push_back(Node);
12317 continue;
12318 }
12319
12320 for (SDValue Op : Node->ops())
12321 Worklist.push_back(Op.getNode());
12322 }
12323
12324 // True if we're outside a call sequence and don't have the FPNode as a
12325 // predecessor. No cycles or nested call sequences possible.
12326 return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
12327 MaxSteps);
12328}
12329
12331 SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
12333 std::optional<unsigned> CallRetResNo) const {
12334 if (LC == RTLIB::UNKNOWN_LIBCALL)
12335 return false;
12336
12337 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
12338 if (LibcallImpl == RTLIB::Unsupported)
12339 return false;
12340
12341 LLVMContext &Ctx = *DAG.getContext();
12342 EVT VT = Node->getValueType(0);
12343 unsigned NumResults = Node->getNumValues();
12344
12345 // Find users of the node that store the results (and share input chains). The
12346 // destination pointers can be used instead of creating stack allocations.
12347 SDValue StoresInChain;
12348 SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
12349 for (SDNode *User : Node->users()) {
12351 continue;
12352 auto *ST = cast<StoreSDNode>(User);
12353 SDValue StoreValue = ST->getValue();
12354 unsigned ResNo = StoreValue.getResNo();
12355 // Ensure the store corresponds to an output pointer.
12356 if (CallRetResNo == ResNo)
12357 continue;
12358 // Ensure the store to the default address space and not atomic or volatile.
12359 if (!ST->isSimple() || ST->getAddressSpace() != 0)
12360 continue;
12361 // Ensure all store chains are the same (so they don't alias).
12362 if (StoresInChain && ST->getChain() != StoresInChain)
12363 continue;
12364 // Ensure the store is properly aligned.
12365 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
12366 if (ST->getAlign() <
12367 DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType()))
12368 continue;
12369 // Avoid:
12370 // 1. Creating cyclic dependencies.
12371 // 2. Expanding the node to a call within a call sequence.
12373 continue;
12374 ResultStores[ResNo] = ST;
12375 StoresInChain = ST->getChain();
12376 }
12377
12378 ArgListTy Args;
12379
12380 // Pass the arguments.
12381 for (const SDValue &Op : Node->op_values()) {
12382 EVT ArgVT = Op.getValueType();
12383 Type *ArgTy = ArgVT.getTypeForEVT(Ctx);
12384 Args.emplace_back(Op, ArgTy);
12385 }
12386
12387 // Pass the output pointers.
12388 SmallVector<SDValue, 2> ResultPtrs(NumResults);
12390 for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) {
12391 if (ResNo == CallRetResNo)
12392 continue;
12393 EVT ResVT = Node->getValueType(ResNo);
12394 SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT);
12395 ResultPtrs[ResNo] = ResultPtr;
12396 Args.emplace_back(ResultPtr, PointerTy);
12397 }
12398
12399 SDLoc DL(Node);
12400
12402 // Pass the vector mask (if required).
12403 EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
12404 SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT);
12405 Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
12406 }
12407
12408 Type *RetType = CallRetResNo.has_value()
12409 ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
12410 : Type::getVoidTy(Ctx);
12411 SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
12412 SDValue Callee =
12413 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
12415 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
12416 getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args));
12417
12418 auto [Call, CallChain] = LowerCallTo(CLI);
12419
12420 for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
12421 if (ResNo == CallRetResNo) {
12422 Results.push_back(Call);
12423 continue;
12424 }
12425 MachinePointerInfo PtrInfo;
12426 SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain,
12427 ResultPtr, PtrInfo);
12428 SDValue OutChain = LoadResult.getValue(1);
12429
12430 if (StoreSDNode *ST = ResultStores[ResNo]) {
12431 // Replace store with the library call.
12432 DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
12433 PtrInfo = ST->getPointerInfo();
12434 } else {
12436 DAG.getMachineFunction(),
12437 cast<FrameIndexSDNode>(ResultPtr)->getIndex());
12438 }
12439
12440 Results.push_back(LoadResult);
12441 }
12442
12443 return true;
12444}
12445
12447 SDValue &LHS, SDValue &RHS,
12448 SDValue &CC, SDValue Mask,
12449 SDValue EVL, bool &NeedInvert,
12450 const SDLoc &dl, SDValue &Chain,
12451 bool IsSignaling) const {
12452 MVT OpVT = LHS.getSimpleValueType();
12453 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
12454 NeedInvert = false;
12455 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12456 bool IsNonVP = !EVL;
12457 switch (getCondCodeAction(CCCode, OpVT)) {
12458 default:
12459 llvm_unreachable("Unknown condition code action!");
12461 // Nothing to do.
12462 break;
12465 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12466 std::swap(LHS, RHS);
12467 CC = DAG.getCondCode(InvCC);
12468 return true;
12469 }
12470 // Swapping operands didn't work. Try inverting the condition.
12471 bool NeedSwap = false;
12472 InvCC = getSetCCInverse(CCCode, OpVT);
12473 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
12474 // If inverting the condition is not enough, try swapping operands
12475 // on top of it.
12476 InvCC = ISD::getSetCCSwappedOperands(InvCC);
12477 NeedSwap = true;
12478 }
12479 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12480 CC = DAG.getCondCode(InvCC);
12481 NeedInvert = true;
12482 if (NeedSwap)
12483 std::swap(LHS, RHS);
12484 return true;
12485 }
12486
12487 // Special case: expand i1 comparisons using logical operations.
12488 if (OpVT == MVT::i1) {
12489 SDValue Ret;
12490 switch (CCCode) {
12491 default:
12492 llvm_unreachable("Unknown integer setcc!");
12493 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12494 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
12495 MVT::i1);
12496 break;
12497 case ISD::SETNE: // X != Y --> (X ^ Y)
12498 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
12499 break;
12500 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12501 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12502 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
12503 DAG.getNOT(dl, LHS, MVT::i1));
12504 break;
12505 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12506 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12507 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
12508 DAG.getNOT(dl, RHS, MVT::i1));
12509 break;
12510 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12511 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12512 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
12513 DAG.getNOT(dl, LHS, MVT::i1));
12514 break;
12515 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12516 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12517 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
12518 DAG.getNOT(dl, RHS, MVT::i1));
12519 break;
12520 }
12521
12522 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
12523 RHS = SDValue();
12524 CC = SDValue();
12525 return true;
12526 }
12527
12529 unsigned Opc = 0;
12530 switch (CCCode) {
12531 default:
12532 llvm_unreachable("Don't know how to expand this condition!");
12533 case ISD::SETUO:
12534 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
12535 CC1 = ISD::SETUNE;
12536 CC2 = ISD::SETUNE;
12537 Opc = ISD::OR;
12538 break;
12539 }
12541 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
12542 NeedInvert = true;
12543 [[fallthrough]];
12544 case ISD::SETO:
12546 "If SETO is expanded, SETOEQ must be legal!");
12547 CC1 = ISD::SETOEQ;
12548 CC2 = ISD::SETOEQ;
12549 Opc = ISD::AND;
12550 break;
12551 case ISD::SETONE:
12552 case ISD::SETUEQ:
12553 // If the SETUO or SETO CC isn't legal, we might be able to use
12554 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
12555 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12556 // the operands.
12557 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12558 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
12559 isCondCodeLegal(ISD::SETOLT, OpVT))) {
12560 CC1 = ISD::SETOGT;
12561 CC2 = ISD::SETOLT;
12562 Opc = ISD::OR;
12563 NeedInvert = ((unsigned)CCCode & 0x8U);
12564 break;
12565 }
12566 [[fallthrough]];
12567 case ISD::SETOEQ:
12568 case ISD::SETOGT:
12569 case ISD::SETOGE:
12570 case ISD::SETOLT:
12571 case ISD::SETOLE:
12572 case ISD::SETUNE:
12573 case ISD::SETUGT:
12574 case ISD::SETUGE:
12575 case ISD::SETULT:
12576 case ISD::SETULE:
12577 // If we are floating point, assign and break, otherwise fall through.
12578 if (!OpVT.isInteger()) {
12579 // We can use the 4th bit to tell if we are the unordered
12580 // or ordered version of the opcode.
12581 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12582 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
12583 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12584 break;
12585 }
12586 // Fallthrough if we are unsigned integer.
12587 [[fallthrough]];
12588 case ISD::SETLE:
12589 case ISD::SETGT:
12590 case ISD::SETGE:
12591 case ISD::SETLT:
12592 case ISD::SETNE:
12593 case ISD::SETEQ:
12594 // If all combinations of inverting the condition and swapping operands
12595 // didn't work then we have no means to expand the condition.
12596 llvm_unreachable("Don't know how to expand this condition!");
12597 }
12598
12599 SDValue SetCC1, SetCC2;
12600 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12601 // If we aren't the ordered or unorder operation,
12602 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12603 if (IsNonVP) {
12604 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
12605 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
12606 } else {
12607 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
12608 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
12609 }
12610 } else {
12611 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12612 if (IsNonVP) {
12613 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
12614 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
12615 } else {
12616 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
12617 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
12618 }
12619 }
12620 if (Chain)
12621 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
12622 SetCC2.getValue(1));
12623 if (IsNonVP)
12624 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
12625 else {
12626 // Transform the binary opcode to the VP equivalent.
12627 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12628 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12629 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12630 }
12631 RHS = SDValue();
12632 CC = SDValue();
12633 return true;
12634 }
12635 }
12636 return false;
12637}
12638
12640 SelectionDAG &DAG) const {
12641 EVT VT = Node->getValueType(0);
12642 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12643 // split into two equal parts.
12644 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
12645 return SDValue();
12646
12647 // Restrict expansion to cases where both parts can be concatenated.
12648 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12649 if (LoVT != HiVT || !isTypeLegal(LoVT))
12650 return SDValue();
12651
12652 SDLoc DL(Node);
12653 unsigned Opcode = Node->getOpcode();
12654
12655 // Don't expand if the result is likely to be unrolled anyway.
12656 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
12657 return SDValue();
12658
12659 SmallVector<SDValue, 4> LoOps, HiOps;
12660 for (const SDValue &V : Node->op_values()) {
12661 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
12662 LoOps.push_back(Lo);
12663 HiOps.push_back(Hi);
12664 }
12665
12666 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
12667 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
12668 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
12669}
12670
12672 const SDLoc &DL,
12673 EVT InVecVT, SDValue EltNo,
12674 LoadSDNode *OriginalLoad,
12675 SelectionDAG &DAG) const {
12676 assert(OriginalLoad->isSimple());
12677
12678 EVT VecEltVT = InVecVT.getVectorElementType();
12679
12680 // If the vector element type is not a multiple of a byte then we are unable
12681 // to correctly compute an address to load only the extracted element as a
12682 // scalar.
12683 if (!VecEltVT.isByteSized())
12684 return SDValue();
12685
12686 ISD::LoadExtType ExtTy =
12687 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
12688 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
12689 return SDValue();
12690
12691 std::optional<unsigned> ByteOffset;
12692 Align Alignment = OriginalLoad->getAlign();
12694 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
12695 int Elt = ConstEltNo->getZExtValue();
12696 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
12697 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
12698 Alignment = commonAlignment(Alignment, *ByteOffset);
12699 } else {
12700 // Discard the pointer info except the address space because the memory
12701 // operand can't represent this new access since the offset is variable.
12702 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
12703 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
12704 }
12705
12706 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
12707 return SDValue();
12708
12709 unsigned IsFast = 0;
12710 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
12711 OriginalLoad->getAddressSpace(), Alignment,
12712 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
12713 !IsFast)
12714 return SDValue();
12715
12716 // The original DAG loaded the entire vector from memory, so arithmetic
12717 // within it must be inbounds.
12719 DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
12720
12721 // We are replacing a vector load with a scalar load. The new load must have
12722 // identical memory op ordering to the original.
12723 SDValue Load;
12724 if (ResultVT.bitsGT(VecEltVT)) {
12725 // If the result type of vextract is wider than the load, then issue an
12726 // extending load instead.
12727 ISD::LoadExtType ExtType = isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT)
12729 : ISD::EXTLOAD;
12730 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
12731 NewPtr, MPI, VecEltVT, Alignment,
12732 OriginalLoad->getMemOperand()->getFlags(),
12733 OriginalLoad->getAAInfo());
12734 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12735 } else {
12736 // The result type is narrower or the same width as the vector element
12737 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
12738 Alignment, OriginalLoad->getMemOperand()->getFlags(),
12739 OriginalLoad->getAAInfo());
12740 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
12741 if (ResultVT.bitsLT(VecEltVT))
12742 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
12743 else
12744 Load = DAG.getBitcast(ResultVT, Load);
12745 }
12746
12747 return Load;
12748}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define _
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T
#define T1
#define P(N)
Function const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, SDNode *FPNode)
Given a store node StoreNode, return true if it is safe to fold that node into FPNode,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1410
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.h:1221
APInt bitcastToAPInt() const
Definition APFloat.h:1416
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1201
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1161
void changeSign()
Definition APFloat.h:1360
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1172
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1584
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1769
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1421
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1406
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1400
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1044
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1527
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1345
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1355
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1256
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1411
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
void negate()
Negate this APInt in place.
Definition APInt.h:1483
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1654
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1613
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1546
unsigned countLeadingZeros() const
Definition APInt.h:1621
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1450
unsigned logBase2() const
Definition APInt.h:1776
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
void setAllBits()
Set every bit to 1.
Definition APInt.h:1334
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1285
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1157
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:996
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1382
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition APInt.h:1432
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1403
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition APInt.h:1457
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1671
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition APInt.h:1358
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:720
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:282
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:214
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
Definition InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition Module.h:445
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
iterator end() const
Definition ArrayRef.h:343
iterator begin() const
Definition ArrayRef.h:342
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI std::optional< unsigned > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
iterator end() const
Definition StringRef.h:114
Class to represent struct types.
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool preferSelectsOverBooleanArithmetic(EVT VT) const
Should we prefer selects to doing arithmetic on boolean types.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall implementation.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
TargetLoweringBase(const TargetMachine &TM, const TargetSubtargetInfo &STI)
NOTE: The TargetMachine owns TLOF.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
bool expandMultipleResultFPLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl< SDValue > &Results, std::optional< unsigned > CallRetResNo={}) const
Expands a node with multiple results to an FP or vector libcall.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
SmallVector< ConstraintPair > ConstraintGroup
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const
Expand carryless multiply.
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual SDValue unwrapAddress(SDValue N) const
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const
For most targets, an LLVM type must be broken down into multiple smaller types.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:796
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:296
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:712
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3020
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:788
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:538
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:394
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:522
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:400
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:774
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:407
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:787
@ PARTIAL_REDUCE_FMLA
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BRIND
BRIND - Indirect branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:356
@ VECTOR_SPLICE_LEFT
VECTOR_SPLICE_LEFT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1, VEC2) left by OFFSET elements an...
Definition ISDOpcodes.h:653
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:413
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ VECTOR_SPLICE_RIGHT
VECTOR_SPLICE_RIGHT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1,VEC2) right by OFFSET elements a...
Definition ISDOpcodes.h:657
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:945
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI NodeType getOppositeSignednessMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns the corresponding opcode with the opposi...
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
void stable_sort(R &&Range)
Definition STLExtras.h:2106
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1613
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2544
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
LLVM_ABI bool isOneOrOneSplatFP(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant floating-point value, or a splatted vector of a constant float...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
void * PointerTy
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1595
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:173
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1775
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
Definition ModRef.h:68
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isZeroOrZeroSplat(SDValue N, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1632
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:430
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:470
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:113
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:314
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:189
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition KnownBits.h:268
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
bool isZero() const
Returns true if value is all zero.
Definition KnownBits.h:80
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:255
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
void setAllConflict()
Make all bits known to be both zero and one.
Definition KnownBits.h:99
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:164
KnownBits byteSwap() const
Definition KnownBits.h:535
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:302
KnownBits reverseBits() const
Definition KnownBits.h:539
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition KnownBits.h:246
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:175
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:334
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition KnownBits.h:69
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:324
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:183
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:261
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition KnownBits.cpp:61
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:170
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:299
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl)
Returns true if the function has a vector mask argument, which is assumed to be the last argument.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
void setNoSignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
std::string ConstraintCode
This contains the actual string for the code, like "m".
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...