LLVM 23.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
47
48// Define the virtual destructor out-of-line for build efficiency.
50
51const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
52 return nullptr;
53}
54
58
59/// Check whether a given call node is in tail position within its function. If
60/// so, it sets Chain to the input chain of the tail call.
62 SDValue &Chain) const {
64
65 // First, check if tail calls have been disabled in this function.
66 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
67 return false;
68
69 // Conservatively require the attributes of the call to match those of
70 // the return. Ignore following attributes because they don't affect the
71 // call sequence.
72 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74 Attribute::DereferenceableOrNull, Attribute::NoAlias,
75 Attribute::NonNull, Attribute::NoUndef,
76 Attribute::Range, Attribute::NoFPClass})
77 CallerAttrs.removeAttribute(Attr);
78
79 if (CallerAttrs.hasAttributes())
80 return false;
81
82 // It's not safe to eliminate the sign / zero extension of the return value.
83 if (CallerAttrs.contains(Attribute::ZExt) ||
84 CallerAttrs.contains(Attribute::SExt))
85 return false;
86
87 // Check if the only use is a function return node.
88 return isUsedByReturnOnly(Node, Chain);
89}
90
92 const uint32_t *CallerPreservedMask,
93 const SmallVectorImpl<CCValAssign> &ArgLocs,
94 const SmallVectorImpl<SDValue> &OutVals) const {
95 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
96 const CCValAssign &ArgLoc = ArgLocs[I];
97 if (!ArgLoc.isRegLoc())
98 continue;
99 MCRegister Reg = ArgLoc.getLocReg();
100 // Only look at callee saved registers.
101 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
102 continue;
103 // Check that we pass the value used for the caller.
104 // (We look for a CopyFromReg reading a virtual register that is used
105 // for the function live-in value of register Reg)
106 SDValue Value = OutVals[I];
107 if (Value->getOpcode() == ISD::AssertZext)
108 Value = Value.getOperand(0);
109 if (Value->getOpcode() != ISD::CopyFromReg)
110 return false;
111 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
112 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
113 return false;
114 }
115 return true;
116}
117
118/// Set CallLoweringInfo attribute flags based on a call instruction
119/// and called function attributes.
121 unsigned ArgIdx) {
122 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
123 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
124 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
125 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
126 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
127 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
128 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
129 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
130 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
131 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
132 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
133 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
134 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
135 Alignment = Call->getParamStackAlign(ArgIdx);
136 IndirectType = nullptr;
138 "multiple ABI attributes?");
139 if (IsByVal) {
140 IndirectType = Call->getParamByValType(ArgIdx);
141 if (!Alignment)
142 Alignment = Call->getParamAlign(ArgIdx);
143 }
144 if (IsPreallocated)
145 IndirectType = Call->getParamPreallocatedType(ArgIdx);
146 if (IsInAlloca)
147 IndirectType = Call->getParamInAllocaType(ArgIdx);
148 if (IsSRet)
149 IndirectType = Call->getParamStructRetType(ArgIdx);
150}
151
152/// Generate a libcall taking the given operands as arguments and returning a
153/// result of type RetVT.
154std::pair<SDValue, SDValue>
155TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
157 MakeLibCallOptions CallOptions, const SDLoc &dl,
158 SDValue InChain) const {
159 if (LibcallImpl == RTLIB::Unsupported)
160 reportFatalInternalError("unsupported library call operation");
161
162 if (!InChain)
163 InChain = DAG.getEntryNode();
164
166 Args.reserve(Ops.size());
167
168 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
169 for (unsigned i = 0; i < Ops.size(); ++i) {
170 SDValue NewOp = Ops[i];
171 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
172 ? OpsTypeOverrides[i]
173 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
174 TargetLowering::ArgListEntry Entry(NewOp, Ty);
175 if (CallOptions.IsSoften)
176 Entry.OrigTy =
177 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
178
179 Entry.IsSExt =
180 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
181 Entry.IsZExt = !Entry.IsSExt;
182
183 if (CallOptions.IsSoften &&
185 Entry.IsSExt = Entry.IsZExt = false;
186 }
187 Args.push_back(Entry);
188 }
189
190 SDValue Callee =
191 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
192
193 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
194 Type *OrigRetTy = RetTy;
196 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
197 bool zeroExtend = !signExtend;
198
199 if (CallOptions.IsSoften) {
200 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
202 signExtend = zeroExtend = false;
203 }
204
205 CLI.setDebugLoc(dl)
206 .setChain(InChain)
207 .setLibCallee(getLibcallImplCallingConv(LibcallImpl), RetTy, OrigRetTy,
208 Callee, std::move(Args))
209 .setNoReturn(CallOptions.DoesNotReturn)
212 .setSExtResult(signExtend)
213 .setZExtResult(zeroExtend);
214 return LowerCallTo(CLI);
215}
216
218 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
219 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
220 const AttributeList &FuncAttributes, EVT *LargestVT) const {
221 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
222 Op.getSrcAlign() < Op.getDstAlign())
223 return false;
224
225 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
226
227 if (VT == MVT::Other) {
228 // Use the largest integer type whose alignment constraints are satisfied.
229 // We only need to check DstAlign here as SrcAlign is always greater or
230 // equal to DstAlign (or zero).
231 VT = MVT::LAST_INTEGER_VALUETYPE;
232 if (Op.isFixedDstAlign())
233 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
234 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
236 assert(VT.isInteger());
237
238 // Find the largest legal integer type.
239 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
240 while (!isTypeLegal(LVT))
241 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
242 assert(LVT.isInteger());
243
244 // If the type we've chosen is larger than the largest legal integer type
245 // then use that instead.
246 if (VT.bitsGT(LVT))
247 VT = LVT;
248 }
249
250 unsigned NumMemOps = 0;
251 uint64_t Size = Op.size();
252 while (Size) {
253 unsigned VTSize = VT.getSizeInBits() / 8;
254 while (VTSize > Size) {
255 // For now, only use non-vector load / store's for the left-over pieces.
256 EVT NewVT = VT;
257 unsigned NewVTSize;
258
259 bool Found = false;
260 if (VT.isVector() || VT.isFloatingPoint()) {
261 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
264 Found = true;
265 else if (NewVT == MVT::i64 &&
267 isSafeMemOpType(MVT::f64)) {
268 // i64 is usually not legal on 32-bit targets, but f64 may be.
269 NewVT = MVT::f64;
270 Found = true;
271 }
272 }
273
274 if (!Found) {
275 do {
276 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
277 if (NewVT == MVT::i8)
278 break;
279 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
280 }
281 NewVTSize = NewVT.getSizeInBits() / 8;
282
283 // If the new VT cannot cover all of the remaining bits, then consider
284 // issuing a (or a pair of) unaligned and overlapping load / store.
285 unsigned Fast;
286 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
288 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
290 Fast)
291 VTSize = Size;
292 else {
293 VT = NewVT;
294 VTSize = NewVTSize;
295 }
296 }
297
298 if (++NumMemOps > Limit)
299 return false;
300
301 MemOps.push_back(VT);
302 Size -= VTSize;
303 }
304
305 return true;
306}
307
308/// Soften the operands of a comparison. This code is shared among BR_CC,
309/// SELECT_CC, and SETCC handlers.
311 SDValue &NewLHS, SDValue &NewRHS,
312 ISD::CondCode &CCCode,
313 const SDLoc &dl, const SDValue OldLHS,
314 const SDValue OldRHS) const {
315 SDValue Chain;
316 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
317 OldRHS, Chain);
318}
319
321 SDValue &NewLHS, SDValue &NewRHS,
322 ISD::CondCode &CCCode,
323 const SDLoc &dl, const SDValue OldLHS,
324 const SDValue OldRHS,
325 SDValue &Chain,
326 bool IsSignaling) const {
327 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
328 // not supporting it. We can update this code when libgcc provides such
329 // functions.
330
331 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
332 && "Unsupported setcc type!");
333
334 // Expand into one or more soft-fp libcall(s).
335 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
336 bool ShouldInvertCC = false;
337 switch (CCCode) {
338 case ISD::SETEQ:
339 case ISD::SETOEQ:
340 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
341 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
342 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
343 break;
344 case ISD::SETNE:
345 case ISD::SETUNE:
346 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
347 (VT == MVT::f64) ? RTLIB::UNE_F64 :
348 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
349 break;
350 case ISD::SETGE:
351 case ISD::SETOGE:
352 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
353 (VT == MVT::f64) ? RTLIB::OGE_F64 :
354 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
355 break;
356 case ISD::SETLT:
357 case ISD::SETOLT:
358 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
359 (VT == MVT::f64) ? RTLIB::OLT_F64 :
360 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
361 break;
362 case ISD::SETLE:
363 case ISD::SETOLE:
364 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
365 (VT == MVT::f64) ? RTLIB::OLE_F64 :
366 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
367 break;
368 case ISD::SETGT:
369 case ISD::SETOGT:
370 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
371 (VT == MVT::f64) ? RTLIB::OGT_F64 :
372 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
373 break;
374 case ISD::SETO:
375 ShouldInvertCC = true;
376 [[fallthrough]];
377 case ISD::SETUO:
378 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
379 (VT == MVT::f64) ? RTLIB::UO_F64 :
380 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
381 break;
382 case ISD::SETONE:
383 // SETONE = O && UNE
384 ShouldInvertCC = true;
385 [[fallthrough]];
386 case ISD::SETUEQ:
387 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
388 (VT == MVT::f64) ? RTLIB::UO_F64 :
389 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
390 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
391 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
392 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
393 break;
394 default:
395 // Invert CC for unordered comparisons
396 ShouldInvertCC = true;
397 switch (CCCode) {
398 case ISD::SETULT:
399 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
400 (VT == MVT::f64) ? RTLIB::OGE_F64 :
401 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
402 break;
403 case ISD::SETULE:
404 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
405 (VT == MVT::f64) ? RTLIB::OGT_F64 :
406 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
407 break;
408 case ISD::SETUGT:
409 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
410 (VT == MVT::f64) ? RTLIB::OLE_F64 :
411 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
412 break;
413 case ISD::SETUGE:
414 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
415 (VT == MVT::f64) ? RTLIB::OLT_F64 :
416 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
417 break;
418 default: llvm_unreachable("Do not know how to soften this setcc!");
419 }
420 }
421
422 // Use the target specific return value for comparison lib calls.
424 SDValue Ops[2] = {NewLHS, NewRHS};
426 EVT OpsVT[2] = { OldLHS.getValueType(),
427 OldRHS.getValueType() };
428 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
429 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
430 NewLHS = Call.first;
431 NewRHS = DAG.getConstant(0, dl, RetVT);
432
433 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
434 if (LC1Impl == RTLIB::Unsupported) {
436 "no libcall available to soften floating-point compare");
437 }
438
439 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
440 if (ShouldInvertCC) {
441 assert(RetVT.isInteger());
442 CCCode = getSetCCInverse(CCCode, RetVT);
443 }
444
445 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
446 // Update Chain.
447 Chain = Call.second;
448 } else {
449 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
450 if (LC2Impl == RTLIB::Unsupported) {
452 "no libcall available to soften floating-point compare");
453 }
454
455 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
456 "unordered call should be simple boolean");
457
458 EVT SetCCVT =
459 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
461 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
462 DAG.getValueType(MVT::i1));
463 }
464
465 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
466 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
467 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
468 if (ShouldInvertCC)
469 CCCode = getSetCCInverse(CCCode, RetVT);
470 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
471 if (Chain)
472 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
473 Call2.second);
474 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
475 Tmp.getValueType(), Tmp, NewLHS);
476 NewRHS = SDValue();
477 }
478}
479
480/// Return the entry encoding for a jump table in the current function. The
481/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
483 // In non-pic modes, just use the address of a block.
486
487 // Otherwise, use a label difference.
489}
490
492 SelectionDAG &DAG) const {
493 return Table;
494}
495
496/// This returns the relocation base for the given PIC jumptable, the same as
497/// getPICJumpTableRelocBase, but as an MCExpr.
498const MCExpr *
500 unsigned JTI,MCContext &Ctx) const{
501 // The normal PIC reloc base is the label at the start of the jump table.
502 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
503}
504
506 SDValue Addr, int JTI,
507 SelectionDAG &DAG) const {
508 SDValue Chain = Value;
509 // Jump table debug info is only needed if CodeView is enabled.
511 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
512 }
513 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
514}
515
516bool
518 const TargetMachine &TM = getTargetMachine();
519 const GlobalValue *GV = GA->getGlobal();
520
521 // If the address is not even local to this DSO we will have to load it from
522 // a got and then add the offset.
523 if (!TM.shouldAssumeDSOLocal(GV))
524 return false;
525
526 // If the code is position independent we will have to add a base register.
528 return false;
529
530 // Otherwise we can do it.
531 return true;
532}
533
534//===----------------------------------------------------------------------===//
535// Optimization Methods
536//===----------------------------------------------------------------------===//
537
538/// If the specified instruction has a constant integer operand and there are
539/// bits set in that constant that are not demanded, then clear those bits and
540/// return true.
542 const APInt &DemandedBits,
543 const APInt &DemandedElts,
544 TargetLoweringOpt &TLO) const {
545 SDLoc DL(Op);
546 unsigned Opcode = Op.getOpcode();
547
548 // Early-out if we've ended up calling an undemanded node, leave this to
549 // constant folding.
550 if (DemandedBits.isZero() || DemandedElts.isZero())
551 return false;
552
553 // Do target-specific constant optimization.
554 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
555 return TLO.New.getNode();
556
557 // FIXME: ISD::SELECT, ISD::SELECT_CC
558 switch (Opcode) {
559 default:
560 break;
561 case ISD::XOR:
562 case ISD::AND:
563 case ISD::OR: {
564 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
565 if (!Op1C || Op1C->isOpaque())
566 return false;
567
568 // If this is a 'not' op, don't touch it because that's a canonical form.
569 const APInt &C = Op1C->getAPIntValue();
570 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
571 return false;
572
573 if (!C.isSubsetOf(DemandedBits)) {
574 EVT VT = Op.getValueType();
575 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
576 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
577 Op->getFlags());
578 return TLO.CombineTo(Op, NewOp);
579 }
580
581 break;
582 }
583 }
584
585 return false;
586}
587
589 const APInt &DemandedBits,
590 TargetLoweringOpt &TLO) const {
591 EVT VT = Op.getValueType();
592 APInt DemandedElts = VT.isVector()
594 : APInt(1, 1);
595 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
596}
597
598/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
599/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
600/// but it could be generalized for targets with other types of implicit
601/// widening casts.
603 const APInt &DemandedBits,
604 TargetLoweringOpt &TLO) const {
605 assert(Op.getNumOperands() == 2 &&
606 "ShrinkDemandedOp only supports binary operators!");
607 assert(Op.getNode()->getNumValues() == 1 &&
608 "ShrinkDemandedOp only supports nodes with one result!");
609
610 EVT VT = Op.getValueType();
611 SelectionDAG &DAG = TLO.DAG;
612 SDLoc dl(Op);
613
614 // Early return, as this function cannot handle vector types.
615 if (VT.isVector())
616 return false;
617
618 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
619 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
620 "ShrinkDemandedOp only supports operands that have the same size!");
621
622 // Don't do this if the node has another user, which may require the
623 // full value.
624 if (!Op.getNode()->hasOneUse())
625 return false;
626
627 // Search for the smallest integer type with free casts to and from
628 // Op's type. For expedience, just check power-of-2 integer types.
629 unsigned DemandedSize = DemandedBits.getActiveBits();
630 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
631 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
632 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
633 if (isTruncateFree(Op, SmallVT) && isZExtFree(SmallVT, VT)) {
634 // We found a type with free casts.
635
636 // If the operation has the 'disjoint' flag, then the
637 // operands on the new node are also disjoint.
638 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
640 unsigned Opcode = Op.getOpcode();
641 if (Opcode == ISD::PTRADD) {
642 // It isn't a ptradd anymore if it doesn't operate on the entire
643 // pointer.
644 Opcode = ISD::ADD;
645 }
646 SDValue X = DAG.getNode(
647 Opcode, dl, SmallVT,
648 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
649 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
650 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
651 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
652 return TLO.CombineTo(Op, Z);
653 }
654 }
655 return false;
656}
657
659 DAGCombinerInfo &DCI) const {
660 SelectionDAG &DAG = DCI.DAG;
661 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
662 !DCI.isBeforeLegalizeOps());
663 KnownBits Known;
664
665 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
666 if (Simplified) {
667 DCI.AddToWorklist(Op.getNode());
669 }
670 return Simplified;
671}
672
674 const APInt &DemandedElts,
675 DAGCombinerInfo &DCI) const {
676 SelectionDAG &DAG = DCI.DAG;
677 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
678 !DCI.isBeforeLegalizeOps());
679 KnownBits Known;
680
681 bool Simplified =
682 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
683 if (Simplified) {
684 DCI.AddToWorklist(Op.getNode());
686 }
687 return Simplified;
688}
689
691 KnownBits &Known,
693 unsigned Depth,
694 bool AssumeSingleUse) const {
695 EVT VT = Op.getValueType();
696
697 // Since the number of lanes in a scalable vector is unknown at compile time,
698 // we track one bit which is implicitly broadcast to all lanes. This means
699 // that all lanes in a scalable vector are considered demanded.
700 APInt DemandedElts = VT.isFixedLengthVector()
702 : APInt(1, 1);
703 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
704 AssumeSingleUse);
705}
706
707// TODO: Under what circumstances can we create nodes? Constant folding?
709 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
710 SelectionDAG &DAG, unsigned Depth) const {
711 EVT VT = Op.getValueType();
712
713 // Limit search depth.
715 return SDValue();
716
717 // Ignore UNDEFs.
718 if (Op.isUndef())
719 return SDValue();
720
721 // Not demanding any bits/elts from Op.
722 if (DemandedBits == 0 || DemandedElts == 0)
723 return DAG.getUNDEF(VT);
724
725 bool IsLE = DAG.getDataLayout().isLittleEndian();
726 unsigned NumElts = DemandedElts.getBitWidth();
727 unsigned BitWidth = DemandedBits.getBitWidth();
728 KnownBits LHSKnown, RHSKnown;
729 switch (Op.getOpcode()) {
730 case ISD::BITCAST: {
731 if (VT.isScalableVector())
732 return SDValue();
733
734 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
735 EVT SrcVT = Src.getValueType();
736 EVT DstVT = Op.getValueType();
737 if (SrcVT == DstVT)
738 return Src;
739
740 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
741 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
742 if (NumSrcEltBits == NumDstEltBits)
744 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
745 return DAG.getBitcast(DstVT, V);
746
747 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
748 unsigned Scale = NumDstEltBits / NumSrcEltBits;
749 unsigned NumSrcElts = SrcVT.getVectorNumElements();
750 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
751 for (unsigned i = 0; i != Scale; ++i) {
752 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
753 unsigned BitOffset = EltOffset * NumSrcEltBits;
754 DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
755 }
756 // Recursive calls below may turn not demanded elements into poison, so we
757 // need to demand all smaller source elements that maps to a demanded
758 // destination element.
759 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
760
762 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
763 return DAG.getBitcast(DstVT, V);
764 }
765
766 // TODO - bigendian once we have test coverage.
767 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
768 unsigned Scale = NumSrcEltBits / NumDstEltBits;
769 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
770 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
771 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
772 for (unsigned i = 0; i != NumElts; ++i)
773 if (DemandedElts[i]) {
774 unsigned Offset = (i % Scale) * NumDstEltBits;
775 DemandedSrcBits.insertBits(DemandedBits, Offset);
776 DemandedSrcElts.setBit(i / Scale);
777 }
778
780 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
781 return DAG.getBitcast(DstVT, V);
782 }
783
784 break;
785 }
786 case ISD::AND: {
787 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
788 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
789
790 // If all of the demanded bits are known 1 on one side, return the other.
791 // These bits cannot contribute to the result of the 'and' in this
792 // context.
793 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
794 return Op.getOperand(0);
795 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
796 return Op.getOperand(1);
797 break;
798 }
799 case ISD::OR: {
800 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
801 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
802
803 // If all of the demanded bits are known zero on one side, return the
804 // other. These bits cannot contribute to the result of the 'or' in this
805 // context.
806 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
807 return Op.getOperand(0);
808 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
809 return Op.getOperand(1);
810 break;
811 }
812 case ISD::XOR: {
813 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
814 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
815
816 // If all of the demanded bits are known zero on one side, return the
817 // other.
818 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
819 return Op.getOperand(0);
820 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
821 return Op.getOperand(1);
822 break;
823 }
824 case ISD::ADD: {
825 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
826 if (RHSKnown.isZero())
827 return Op.getOperand(0);
828
829 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
830 if (LHSKnown.isZero())
831 return Op.getOperand(1);
832 break;
833 }
834 case ISD::SHL: {
835 // If we are only demanding sign bits then we can use the shift source
836 // directly.
837 if (std::optional<unsigned> MaxSA =
838 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
839 SDValue Op0 = Op.getOperand(0);
840 unsigned ShAmt = *MaxSA;
841 unsigned NumSignBits =
842 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
843 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
844 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
845 return Op0;
846 }
847 break;
848 }
849 case ISD::SRL: {
850 // If we are only demanding sign bits then we can use the shift source
851 // directly.
852 if (std::optional<unsigned> MaxSA =
853 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
854 SDValue Op0 = Op.getOperand(0);
855 unsigned ShAmt = *MaxSA;
856 // Must already be signbits in DemandedBits bounds, and can't demand any
857 // shifted in zeroes.
858 if (DemandedBits.countl_zero() >= ShAmt) {
859 unsigned NumSignBits =
860 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
861 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
862 return Op0;
863 }
864 }
865 break;
866 }
867 case ISD::SETCC: {
868 SDValue Op0 = Op.getOperand(0);
869 SDValue Op1 = Op.getOperand(1);
870 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
871 // If (1) we only need the sign-bit, (2) the setcc operands are the same
872 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
873 // -1, we may be able to bypass the setcc.
874 if (DemandedBits.isSignMask() &&
878 // If we're testing X < 0, then this compare isn't needed - just use X!
879 // FIXME: We're limiting to integer types here, but this should also work
880 // if we don't care about FP signed-zero. The use of SETLT with FP means
881 // that we don't care about NaNs.
882 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
884 return Op0;
885 }
886 break;
887 }
889 // If none of the extended bits are demanded, eliminate the sextinreg.
890 SDValue Op0 = Op.getOperand(0);
891 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
892 unsigned ExBits = ExVT.getScalarSizeInBits();
893 if (DemandedBits.getActiveBits() <= ExBits &&
895 return Op0;
896 // If the input is already sign extended, just drop the extension.
897 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
898 if (NumSignBits >= (BitWidth - ExBits + 1))
899 return Op0;
900 break;
901 }
905 if (VT.isScalableVector())
906 return SDValue();
907
908 // If we only want the lowest element and none of extended bits, then we can
909 // return the bitcasted source vector.
910 SDValue Src = Op.getOperand(0);
911 EVT SrcVT = Src.getValueType();
912 EVT DstVT = Op.getValueType();
913 if (IsLE && DemandedElts == 1 &&
914 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
915 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
916 return DAG.getBitcast(DstVT, Src);
917 }
918 break;
919 }
921 if (VT.isScalableVector())
922 return SDValue();
923
924 // If we don't demand the inserted element, return the base vector.
925 SDValue Vec = Op.getOperand(0);
926 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
927 EVT VecVT = Vec.getValueType();
928 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
929 !DemandedElts[CIdx->getZExtValue()])
930 return Vec;
931 break;
932 }
934 if (VT.isScalableVector())
935 return SDValue();
936
937 SDValue Vec = Op.getOperand(0);
938 SDValue Sub = Op.getOperand(1);
939 uint64_t Idx = Op.getConstantOperandVal(2);
940 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
941 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
942 // If we don't demand the inserted subvector, return the base vector.
943 if (DemandedSubElts == 0)
944 return Vec;
945 break;
946 }
947 case ISD::VECTOR_SHUFFLE: {
949 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
950
951 // If all the demanded elts are from one operand and are inline,
952 // then we can use the operand directly.
953 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
954 for (unsigned i = 0; i != NumElts; ++i) {
955 int M = ShuffleMask[i];
956 if (M < 0 || !DemandedElts[i])
957 continue;
958 AllUndef = false;
959 IdentityLHS &= (M == (int)i);
960 IdentityRHS &= ((M - NumElts) == i);
961 }
962
963 if (AllUndef)
964 return DAG.getUNDEF(Op.getValueType());
965 if (IdentityLHS)
966 return Op.getOperand(0);
967 if (IdentityRHS)
968 return Op.getOperand(1);
969 break;
970 }
971 default:
972 // TODO: Probably okay to remove after audit; here to reduce change size
973 // in initial enablement patch for scalable vectors
974 if (VT.isScalableVector())
975 return SDValue();
976
977 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
979 Op, DemandedBits, DemandedElts, DAG, Depth))
980 return V;
981 break;
982 }
983 return SDValue();
984}
985
988 unsigned Depth) const {
989 EVT VT = Op.getValueType();
990 // Since the number of lanes in a scalable vector is unknown at compile time,
991 // we track one bit which is implicitly broadcast to all lanes. This means
992 // that all lanes in a scalable vector are considered demanded.
993 APInt DemandedElts = VT.isFixedLengthVector()
995 : APInt(1, 1);
996 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
997 Depth);
998}
999
1001 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1002 unsigned Depth) const {
1003 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1004 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1005 Depth);
1006}
1007
1008// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1009// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1012 const TargetLowering &TLI,
1013 const APInt &DemandedBits,
1014 const APInt &DemandedElts, unsigned Depth) {
1015 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1016 "SRL or SRA node is required here!");
1017 // Is the right shift using an immediate value of 1?
1018 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1019 if (!N1C || !N1C->isOne())
1020 return SDValue();
1021
1022 // We are looking for an avgfloor
1023 // add(ext, ext)
1024 // or one of these as a avgceil
1025 // add(add(ext, ext), 1)
1026 // add(add(ext, 1), ext)
1027 // add(ext, add(ext, 1))
1028 SDValue Add = Op.getOperand(0);
1029 if (Add.getOpcode() != ISD::ADD)
1030 return SDValue();
1031
1032 SDValue ExtOpA = Add.getOperand(0);
1033 SDValue ExtOpB = Add.getOperand(1);
1034 SDValue Add2;
1035 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1036 ConstantSDNode *ConstOp;
1037 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1038 ConstOp->isOne()) {
1039 ExtOpA = Op1;
1040 ExtOpB = Op3;
1041 Add2 = A;
1042 return true;
1043 }
1044 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1045 ConstOp->isOne()) {
1046 ExtOpA = Op1;
1047 ExtOpB = Op2;
1048 Add2 = A;
1049 return true;
1050 }
1051 return false;
1052 };
1053 bool IsCeil =
1054 (ExtOpA.getOpcode() == ISD::ADD &&
1055 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1056 (ExtOpB.getOpcode() == ISD::ADD &&
1057 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1058
1059 // If the shift is signed (sra):
1060 // - Needs >= 2 sign bit for both operands.
1061 // - Needs >= 2 zero bits.
1062 // If the shift is unsigned (srl):
1063 // - Needs >= 1 zero bit for both operands.
1064 // - Needs 1 demanded bit zero and >= 2 sign bits.
1065 SelectionDAG &DAG = TLO.DAG;
1066 unsigned ShiftOpc = Op.getOpcode();
1067 bool IsSigned = false;
1068 unsigned KnownBits;
1069 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1070 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1071 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1072 unsigned NumZeroA =
1073 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1074 unsigned NumZeroB =
1075 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1076 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1077
1078 switch (ShiftOpc) {
1079 default:
1080 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1081 case ISD::SRA: {
1082 if (NumZero >= 2 && NumSigned < NumZero) {
1083 IsSigned = false;
1084 KnownBits = NumZero;
1085 break;
1086 }
1087 if (NumSigned >= 1) {
1088 IsSigned = true;
1089 KnownBits = NumSigned;
1090 break;
1091 }
1092 return SDValue();
1093 }
1094 case ISD::SRL: {
1095 if (NumZero >= 1 && NumSigned < NumZero) {
1096 IsSigned = false;
1097 KnownBits = NumZero;
1098 break;
1099 }
1100 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1101 IsSigned = true;
1102 KnownBits = NumSigned;
1103 break;
1104 }
1105 return SDValue();
1106 }
1107 }
1108
1109 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1110 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1111
1112 // Find the smallest power-2 type that is legal for this vector size and
1113 // operation, given the original type size and the number of known sign/zero
1114 // bits.
1115 EVT VT = Op.getValueType();
1116 unsigned MinWidth =
1117 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1118 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1120 return SDValue();
1121 if (VT.isVector())
1122 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1123 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1124 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1125 // larger type size to do the transform.
1126 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1127 return SDValue();
1128 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1129 Add.getOperand(1)) &&
1130 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1131 Add2.getOperand(1))))
1132 NVT = VT;
1133 else
1134 return SDValue();
1135 }
1136
1137 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1138 // this is likely to stop other folds (reassociation, value tracking etc.)
1139 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1140 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1141 return SDValue();
1142
1143 SDLoc DL(Op);
1144 SDValue ResultAVG =
1145 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1146 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1147 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1148}
1149
1150/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1151/// result of Op are ever used downstream. If we can use this information to
1152/// simplify Op, create a new simplified DAG node and return true, returning the
1153/// original and new nodes in Old and New. Otherwise, analyze the expression and
1154/// return a mask of Known bits for the expression (used to simplify the
1155/// caller). The Known bits may only be accurate for those bits in the
1156/// OriginalDemandedBits and OriginalDemandedElts.
1158 SDValue Op, const APInt &OriginalDemandedBits,
1159 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1160 unsigned Depth, bool AssumeSingleUse) const {
1161 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1162 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1163 "Mask size mismatches value type size!");
1164
1165 // Don't know anything.
1166 Known = KnownBits(BitWidth);
1167
1168 EVT VT = Op.getValueType();
1169 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1170 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1171 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1172 "Unexpected vector size");
1173
1174 APInt DemandedBits = OriginalDemandedBits;
1175 APInt DemandedElts = OriginalDemandedElts;
1176 SDLoc dl(Op);
1177
1178 // Undef operand.
1179 if (Op.isUndef())
1180 return false;
1181
1182 // We can't simplify target constants.
1183 if (Op.getOpcode() == ISD::TargetConstant)
1184 return false;
1185
1186 if (Op.getOpcode() == ISD::Constant) {
1187 // We know all of the bits for a constant!
1188 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1189 return false;
1190 }
1191
1192 if (Op.getOpcode() == ISD::ConstantFP) {
1193 // We know all of the bits for a floating point constant!
1195 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1196 return false;
1197 }
1198
1199 // Other users may use these bits.
1200 bool HasMultiUse = false;
1201 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1203 // Limit search depth.
1204 return false;
1205 }
1206 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1208 DemandedElts = APInt::getAllOnes(NumElts);
1209 HasMultiUse = true;
1210 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1211 // Not demanding any bits/elts from Op.
1212 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1213 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1214 // Limit search depth.
1215 return false;
1216 }
1217
1218 KnownBits Known2;
1219 switch (Op.getOpcode()) {
1220 case ISD::SCALAR_TO_VECTOR: {
1221 if (VT.isScalableVector())
1222 return false;
1223 if (!DemandedElts[0])
1224 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1225
1226 KnownBits SrcKnown;
1227 SDValue Src = Op.getOperand(0);
1228 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1229 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1230 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1231 return true;
1232
1233 // Upper elements are undef, so only get the knownbits if we just demand
1234 // the bottom element.
1235 if (DemandedElts == 1)
1236 Known = SrcKnown.anyextOrTrunc(BitWidth);
1237 break;
1238 }
1239 case ISD::BUILD_VECTOR:
1240 // Collect the known bits that are shared by every demanded element.
1241 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1242 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1243 return false; // Don't fall through, will infinitely loop.
1244 case ISD::SPLAT_VECTOR: {
1245 SDValue Scl = Op.getOperand(0);
1246 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1247 KnownBits KnownScl;
1248 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1249 return true;
1250
1251 // Implicitly truncate the bits to match the official semantics of
1252 // SPLAT_VECTOR.
1253 Known = KnownScl.trunc(BitWidth);
1254 break;
1255 }
1256 case ISD::LOAD: {
1257 auto *LD = cast<LoadSDNode>(Op);
1258 if (getTargetConstantFromLoad(LD)) {
1259 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1260 return false; // Don't fall through, will infinitely loop.
1261 }
1262 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1263 // If this is a ZEXTLoad and we are looking at the loaded value.
1264 EVT MemVT = LD->getMemoryVT();
1265 unsigned MemBits = MemVT.getScalarSizeInBits();
1266 Known.Zero.setBitsFrom(MemBits);
1267 return false; // Don't fall through, will infinitely loop.
1268 }
1269 break;
1270 }
1272 if (VT.isScalableVector())
1273 return false;
1274 SDValue Vec = Op.getOperand(0);
1275 SDValue Scl = Op.getOperand(1);
1276 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1277 EVT VecVT = Vec.getValueType();
1278
1279 // If index isn't constant, assume we need all vector elements AND the
1280 // inserted element.
1281 APInt DemandedVecElts(DemandedElts);
1282 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1283 unsigned Idx = CIdx->getZExtValue();
1284 DemandedVecElts.clearBit(Idx);
1285
1286 // Inserted element is not required.
1287 if (!DemandedElts[Idx])
1288 return TLO.CombineTo(Op, Vec);
1289 }
1290
1291 KnownBits KnownScl;
1292 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1293 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1294 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1295 return true;
1296
1297 Known = KnownScl.anyextOrTrunc(BitWidth);
1298
1299 KnownBits KnownVec;
1300 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1301 Depth + 1))
1302 return true;
1303
1304 if (!!DemandedVecElts)
1305 Known = Known.intersectWith(KnownVec);
1306
1307 return false;
1308 }
1309 case ISD::INSERT_SUBVECTOR: {
1310 if (VT.isScalableVector())
1311 return false;
1312 // Demand any elements from the subvector and the remainder from the src its
1313 // inserted into.
1314 SDValue Src = Op.getOperand(0);
1315 SDValue Sub = Op.getOperand(1);
1316 uint64_t Idx = Op.getConstantOperandVal(2);
1317 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1318 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1319 APInt DemandedSrcElts = DemandedElts;
1320 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1321
1322 KnownBits KnownSub, KnownSrc;
1323 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1324 Depth + 1))
1325 return true;
1326 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1327 Depth + 1))
1328 return true;
1329
1330 Known.setAllConflict();
1331 if (!!DemandedSubElts)
1332 Known = Known.intersectWith(KnownSub);
1333 if (!!DemandedSrcElts)
1334 Known = Known.intersectWith(KnownSrc);
1335
1336 // Attempt to avoid multi-use src if we don't need anything from it.
1337 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1338 !DemandedSrcElts.isAllOnes()) {
1340 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1342 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1343 if (NewSub || NewSrc) {
1344 NewSub = NewSub ? NewSub : Sub;
1345 NewSrc = NewSrc ? NewSrc : Src;
1346 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1347 Op.getOperand(2));
1348 return TLO.CombineTo(Op, NewOp);
1349 }
1350 }
1351 break;
1352 }
1354 if (VT.isScalableVector())
1355 return false;
1356 // Offset the demanded elts by the subvector index.
1357 SDValue Src = Op.getOperand(0);
1358 if (Src.getValueType().isScalableVector())
1359 break;
1360 uint64_t Idx = Op.getConstantOperandVal(1);
1361 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1362 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1363
1364 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1365 Depth + 1))
1366 return true;
1367
1368 // Attempt to avoid multi-use src if we don't need anything from it.
1369 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1371 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1372 if (DemandedSrc) {
1373 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1374 Op.getOperand(1));
1375 return TLO.CombineTo(Op, NewOp);
1376 }
1377 }
1378 break;
1379 }
1380 case ISD::CONCAT_VECTORS: {
1381 if (VT.isScalableVector())
1382 return false;
1383 Known.setAllConflict();
1384 EVT SubVT = Op.getOperand(0).getValueType();
1385 unsigned NumSubVecs = Op.getNumOperands();
1386 unsigned NumSubElts = SubVT.getVectorNumElements();
1387 for (unsigned i = 0; i != NumSubVecs; ++i) {
1388 APInt DemandedSubElts =
1389 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1390 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1391 Known2, TLO, Depth + 1))
1392 return true;
1393 // Known bits are shared by every demanded subvector element.
1394 if (!!DemandedSubElts)
1395 Known = Known.intersectWith(Known2);
1396 }
1397 break;
1398 }
1399 case ISD::VECTOR_SHUFFLE: {
1400 assert(!VT.isScalableVector());
1401 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1402
1403 // Collect demanded elements from shuffle operands..
1404 APInt DemandedLHS, DemandedRHS;
1405 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1406 DemandedRHS))
1407 break;
1408
1409 if (!!DemandedLHS || !!DemandedRHS) {
1410 SDValue Op0 = Op.getOperand(0);
1411 SDValue Op1 = Op.getOperand(1);
1412
1413 Known.setAllConflict();
1414 if (!!DemandedLHS) {
1415 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1416 Depth + 1))
1417 return true;
1418 Known = Known.intersectWith(Known2);
1419 }
1420 if (!!DemandedRHS) {
1421 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1422 Depth + 1))
1423 return true;
1424 Known = Known.intersectWith(Known2);
1425 }
1426
1427 // Attempt to avoid multi-use ops if we don't need anything from them.
1429 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1431 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1432 if (DemandedOp0 || DemandedOp1) {
1433 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1434 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1435 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1436 return TLO.CombineTo(Op, NewOp);
1437 }
1438 }
1439 break;
1440 }
1441 case ISD::AND: {
1442 SDValue Op0 = Op.getOperand(0);
1443 SDValue Op1 = Op.getOperand(1);
1444
1445 // If the RHS is a constant, check to see if the LHS would be zero without
1446 // using the bits from the RHS. Below, we use knowledge about the RHS to
1447 // simplify the LHS, here we're using information from the LHS to simplify
1448 // the RHS.
1449 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1450 // Do not increment Depth here; that can cause an infinite loop.
1451 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1452 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1453 if ((LHSKnown.Zero & DemandedBits) ==
1454 (~RHSC->getAPIntValue() & DemandedBits))
1455 return TLO.CombineTo(Op, Op0);
1456
1457 // If any of the set bits in the RHS are known zero on the LHS, shrink
1458 // the constant.
1459 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1460 DemandedElts, TLO))
1461 return true;
1462
1463 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1464 // constant, but if this 'and' is only clearing bits that were just set by
1465 // the xor, then this 'and' can be eliminated by shrinking the mask of
1466 // the xor. For example, for a 32-bit X:
1467 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1468 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1469 LHSKnown.One == ~RHSC->getAPIntValue()) {
1470 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1471 return TLO.CombineTo(Op, Xor);
1472 }
1473 }
1474
1475 // (X +/- Y) & Y --> ~X & Y when Y is a power of 2 (or zero).
1476 SDValue X, Y;
1477 if (sd_match(Op,
1478 m_And(m_Value(Y),
1480 m_Sub(m_Value(X), m_Deferred(Y)))))) &&
1481 TLO.DAG.isKnownToBeAPowerOfTwo(Y, DemandedElts, /*OrZero=*/true)) {
1482 return TLO.CombineTo(
1483 Op, TLO.DAG.getNode(ISD::AND, dl, VT, TLO.DAG.getNOT(dl, X, VT), Y));
1484 }
1485
1486 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1487 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1488 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1489 (Op0.getOperand(0).isUndef() ||
1491 Op0->hasOneUse()) {
1492 unsigned NumSubElts =
1494 unsigned SubIdx = Op0.getConstantOperandVal(2);
1495 APInt DemandedSub =
1496 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1497 KnownBits KnownSubMask =
1498 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1499 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1500 SDValue NewAnd =
1501 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1502 SDValue NewInsert =
1503 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1504 Op0.getOperand(1), Op0.getOperand(2));
1505 return TLO.CombineTo(Op, NewInsert);
1506 }
1507 }
1508
1509 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1510 Depth + 1))
1511 return true;
1512 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1513 Known2, TLO, Depth + 1))
1514 return true;
1515
1516 // If all of the demanded bits are known one on one side, return the other.
1517 // These bits cannot contribute to the result of the 'and'.
1518 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1519 return TLO.CombineTo(Op, Op0);
1520 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1521 return TLO.CombineTo(Op, Op1);
1522 // If all of the demanded bits in the inputs are known zeros, return zero.
1523 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1524 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1525 // If the RHS is a constant, see if we can simplify it.
1526 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1527 TLO))
1528 return true;
1529 // If the operation can be done in a smaller type, do so.
1531 return true;
1532
1533 // Attempt to avoid multi-use ops if we don't need anything from them.
1534 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1536 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1538 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1539 if (DemandedOp0 || DemandedOp1) {
1540 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1541 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1542 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1543 return TLO.CombineTo(Op, NewOp);
1544 }
1545 }
1546
1547 Known &= Known2;
1548 break;
1549 }
1550 case ISD::OR: {
1551 SDValue Op0 = Op.getOperand(0);
1552 SDValue Op1 = Op.getOperand(1);
1553 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1554 Depth + 1)) {
1555 Op->dropFlags(SDNodeFlags::Disjoint);
1556 return true;
1557 }
1558
1559 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1560 Known2, TLO, Depth + 1)) {
1561 Op->dropFlags(SDNodeFlags::Disjoint);
1562 return true;
1563 }
1564
1565 // If all of the demanded bits are known zero on one side, return the other.
1566 // These bits cannot contribute to the result of the 'or'.
1567 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1568 return TLO.CombineTo(Op, Op0);
1569 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1570 return TLO.CombineTo(Op, Op1);
1571 // If the RHS is a constant, see if we can simplify it.
1572 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1573 return true;
1574 // If the operation can be done in a smaller type, do so.
1576 return true;
1577
1578 // Attempt to avoid multi-use ops if we don't need anything from them.
1579 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1581 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1583 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1584 if (DemandedOp0 || DemandedOp1) {
1585 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1586 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1587 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1588 return TLO.CombineTo(Op, NewOp);
1589 }
1590 }
1591
1592 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1593 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1594 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1595 Op0->hasOneUse() && Op1->hasOneUse()) {
1596 // Attempt to match all commutations - m_c_Or would've been useful!
1597 for (int I = 0; I != 2; ++I) {
1598 SDValue X = Op.getOperand(I).getOperand(0);
1599 SDValue C1 = Op.getOperand(I).getOperand(1);
1600 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1601 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1602 if (Alt.getOpcode() == ISD::OR) {
1603 for (int J = 0; J != 2; ++J) {
1604 if (X == Alt.getOperand(J)) {
1605 SDValue Y = Alt.getOperand(1 - J);
1606 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1607 {C1, C2})) {
1608 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1609 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1610 return TLO.CombineTo(
1611 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1612 }
1613 }
1614 }
1615 }
1616 }
1617 }
1618
1619 Known |= Known2;
1620 break;
1621 }
1622 case ISD::XOR: {
1623 SDValue Op0 = Op.getOperand(0);
1624 SDValue Op1 = Op.getOperand(1);
1625
1626 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1627 Depth + 1))
1628 return true;
1629 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1630 Depth + 1))
1631 return true;
1632
1633 // If all of the demanded bits are known zero on one side, return the other.
1634 // These bits cannot contribute to the result of the 'xor'.
1635 if (DemandedBits.isSubsetOf(Known.Zero))
1636 return TLO.CombineTo(Op, Op0);
1637 if (DemandedBits.isSubsetOf(Known2.Zero))
1638 return TLO.CombineTo(Op, Op1);
1639 // If the operation can be done in a smaller type, do so.
1641 return true;
1642
1643 // If all of the unknown bits are known to be zero on one side or the other
1644 // turn this into an *inclusive* or.
1645 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1646 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1647 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1648
1649 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1650 if (C) {
1651 // If one side is a constant, and all of the set bits in the constant are
1652 // also known set on the other side, turn this into an AND, as we know
1653 // the bits will be cleared.
1654 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1655 // NB: it is okay if more bits are known than are requested
1656 if (C->getAPIntValue() == Known2.One) {
1657 SDValue ANDC =
1658 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1659 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1660 }
1661
1662 // If the RHS is a constant, see if we can change it. Don't alter a -1
1663 // constant because that's a 'not' op, and that is better for combining
1664 // and codegen.
1665 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1666 // We're flipping all demanded bits. Flip the undemanded bits too.
1667 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1668 return TLO.CombineTo(Op, New);
1669 }
1670
1671 unsigned Op0Opcode = Op0.getOpcode();
1672 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1673 if (ConstantSDNode *ShiftC =
1674 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1675 // Don't crash on an oversized shift. We can not guarantee that a
1676 // bogus shift has been simplified to undef.
1677 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1678 uint64_t ShiftAmt = ShiftC->getZExtValue();
1680 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1681 : Ones.lshr(ShiftAmt);
1682 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1684 // If the xor constant is a demanded mask, do a 'not' before the
1685 // shift:
1686 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1687 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1688 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1689 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1690 Op0.getOperand(1)));
1691 }
1692 }
1693 }
1694 }
1695 }
1696
1697 // If we can't turn this into a 'not', try to shrink the constant.
1698 if (!C || !C->isAllOnes())
1699 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1700 return true;
1701
1702 // Attempt to avoid multi-use ops if we don't need anything from them.
1703 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1705 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1707 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1708 if (DemandedOp0 || DemandedOp1) {
1709 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1710 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1711 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1712 return TLO.CombineTo(Op, NewOp);
1713 }
1714 }
1715
1716 Known ^= Known2;
1717 break;
1718 }
1719 case ISD::SELECT:
1720 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1721 Known, TLO, Depth + 1))
1722 return true;
1723 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1724 Known2, TLO, Depth + 1))
1725 return true;
1726
1727 // If the operands are constants, see if we can simplify them.
1728 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1729 return true;
1730
1731 // Only known if known in both the LHS and RHS.
1732 Known = Known.intersectWith(Known2);
1733 break;
1734 case ISD::VSELECT:
1735 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1736 Known, TLO, Depth + 1))
1737 return true;
1738 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1739 Known2, TLO, Depth + 1))
1740 return true;
1741
1742 // Only known if known in both the LHS and RHS.
1743 Known = Known.intersectWith(Known2);
1744 break;
1745 case ISD::SELECT_CC:
1746 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1747 Known, TLO, Depth + 1))
1748 return true;
1749 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1750 Known2, TLO, Depth + 1))
1751 return true;
1752
1753 // If the operands are constants, see if we can simplify them.
1754 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1755 return true;
1756
1757 // Only known if known in both the LHS and RHS.
1758 Known = Known.intersectWith(Known2);
1759 break;
1760 case ISD::SETCC: {
1761 SDValue Op0 = Op.getOperand(0);
1762 SDValue Op1 = Op.getOperand(1);
1763 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1764 // If we're testing X < 0, X >= 0, X <= -1 or X > -1
1765 // (X is of integer type) then we only need the sign mask of the previous
1766 // result
1767 if (Op1.getValueType().isInteger() &&
1768 (((CC == ISD::SETLT || CC == ISD::SETGE) && isNullOrNullSplat(Op1)) ||
1769 ((CC == ISD::SETLE || CC == ISD::SETGT) &&
1770 isAllOnesOrAllOnesSplat(Op1)))) {
1771 KnownBits KnownOp0;
1774 DemandedElts, KnownOp0, TLO, Depth + 1))
1775 return true;
1776 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1777 // width as the setcc result, and (3) the result of a setcc conforms to 0
1778 // or -1, we may be able to bypass the setcc.
1779 if (DemandedBits.isSignMask() &&
1783 // If we remove a >= 0 or > -1 (for integers), we need to introduce a
1784 // NOT Operation
1785 if (CC == ISD::SETGE || CC == ISD::SETGT) {
1786 SDLoc DL(Op);
1787 EVT VT = Op0.getValueType();
1788 SDValue NotOp0 = TLO.DAG.getNOT(DL, Op0, VT);
1789 return TLO.CombineTo(Op, NotOp0);
1790 }
1791 return TLO.CombineTo(Op, Op0);
1792 }
1793 }
1794 if (getBooleanContents(Op0.getValueType()) ==
1796 BitWidth > 1)
1797 Known.Zero.setBitsFrom(1);
1798 break;
1799 }
1800 case ISD::SHL: {
1801 SDValue Op0 = Op.getOperand(0);
1802 SDValue Op1 = Op.getOperand(1);
1803 EVT ShiftVT = Op1.getValueType();
1804
1805 if (std::optional<unsigned> KnownSA =
1806 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1807 unsigned ShAmt = *KnownSA;
1808 if (ShAmt == 0)
1809 return TLO.CombineTo(Op, Op0);
1810
1811 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1812 // single shift. We can do this if the bottom bits (which are shifted
1813 // out) are never demanded.
1814 // TODO - support non-uniform vector amounts.
1815 if (Op0.getOpcode() == ISD::SRL) {
1816 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1817 if (std::optional<unsigned> InnerSA =
1818 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1819 unsigned C1 = *InnerSA;
1820 unsigned Opc = ISD::SHL;
1821 int Diff = ShAmt - C1;
1822 if (Diff < 0) {
1823 Diff = -Diff;
1824 Opc = ISD::SRL;
1825 }
1826 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1827 return TLO.CombineTo(
1828 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1829 }
1830 }
1831 }
1832
1833 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1834 // are not demanded. This will likely allow the anyext to be folded away.
1835 // TODO - support non-uniform vector amounts.
1836 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1837 SDValue InnerOp = Op0.getOperand(0);
1838 EVT InnerVT = InnerOp.getValueType();
1839 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1840 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1841 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1842 SDValue NarrowShl = TLO.DAG.getNode(
1843 ISD::SHL, dl, InnerVT, InnerOp,
1844 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1845 return TLO.CombineTo(
1846 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1847 }
1848
1849 // Repeat the SHL optimization above in cases where an extension
1850 // intervenes: (shl (anyext (shr x, c1)), c2) to
1851 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1852 // aren't demanded (as above) and that the shifted upper c1 bits of
1853 // x aren't demanded.
1854 // TODO - support non-uniform vector amounts.
1855 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1856 InnerOp.hasOneUse()) {
1857 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1858 InnerOp, DemandedElts, Depth + 2)) {
1859 unsigned InnerShAmt = *SA2;
1860 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1861 DemandedBits.getActiveBits() <=
1862 (InnerBits - InnerShAmt + ShAmt) &&
1863 DemandedBits.countr_zero() >= ShAmt) {
1864 SDValue NewSA =
1865 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1866 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1867 InnerOp.getOperand(0));
1868 return TLO.CombineTo(
1869 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1870 }
1871 }
1872 }
1873 }
1874
1875 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1876 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1877 Depth + 1)) {
1878 // Disable the nsw and nuw flags. We can no longer guarantee that we
1879 // won't wrap after simplification.
1880 Op->dropFlags(SDNodeFlags::NoWrap);
1881 return true;
1882 }
1883 Known <<= ShAmt;
1884 // low bits known zero.
1885 Known.Zero.setLowBits(ShAmt);
1886
1887 // Attempt to avoid multi-use ops if we don't need anything from them.
1888 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1890 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1891 if (DemandedOp0) {
1892 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1893 return TLO.CombineTo(Op, NewOp);
1894 }
1895 }
1896
1897 // TODO: Can we merge this fold with the one below?
1898 // Try shrinking the operation as long as the shift amount will still be
1899 // in range.
1900 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1901 Op.getNode()->hasOneUse()) {
1902 // Search for the smallest integer type with free casts to and from
1903 // Op's type. For expedience, just check power-of-2 integer types.
1904 unsigned DemandedSize = DemandedBits.getActiveBits();
1905 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1906 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1907 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1908 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1909 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1910 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1911 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1912 assert(DemandedSize <= SmallVTBits &&
1913 "Narrowed below demanded bits?");
1914 // We found a type with free casts.
1915 SDValue NarrowShl = TLO.DAG.getNode(
1916 ISD::SHL, dl, SmallVT,
1917 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1918 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1919 return TLO.CombineTo(
1920 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1921 }
1922 }
1923 }
1924
1925 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1926 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1927 // Only do this if we demand the upper half so the knownbits are correct.
1928 unsigned HalfWidth = BitWidth / 2;
1929 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1930 DemandedBits.countLeadingOnes() >= HalfWidth) {
1931 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1932 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1933 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1934 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1935 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1936 // If we're demanding the upper bits at all, we must ensure
1937 // that the upper bits of the shift result are known to be zero,
1938 // which is equivalent to the narrow shift being NUW.
1939 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1940 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1941 SDNodeFlags Flags;
1942 Flags.setNoSignedWrap(IsNSW);
1943 Flags.setNoUnsignedWrap(IsNUW);
1944 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1945 SDValue NewShiftAmt =
1946 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1947 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1948 NewShiftAmt, Flags);
1949 SDValue NewExt =
1950 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1951 return TLO.CombineTo(Op, NewExt);
1952 }
1953 }
1954 }
1955 } else {
1956 // This is a variable shift, so we can't shift the demand mask by a known
1957 // amount. But if we are not demanding high bits, then we are not
1958 // demanding those bits from the pre-shifted operand either.
1959 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1960 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1961 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1962 Depth + 1)) {
1963 // Disable the nsw and nuw flags. We can no longer guarantee that we
1964 // won't wrap after simplification.
1965 Op->dropFlags(SDNodeFlags::NoWrap);
1966 return true;
1967 }
1968 Known.resetAll();
1969 }
1970 }
1971
1972 // If we are only demanding sign bits then we can use the shift source
1973 // directly.
1974 if (std::optional<unsigned> MaxSA =
1975 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1976 unsigned ShAmt = *MaxSA;
1977 unsigned NumSignBits =
1978 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1979 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1980 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1981 return TLO.CombineTo(Op, Op0);
1982 }
1983 break;
1984 }
1985 case ISD::SRL: {
1986 SDValue Op0 = Op.getOperand(0);
1987 SDValue Op1 = Op.getOperand(1);
1988 EVT ShiftVT = Op1.getValueType();
1989
1990 if (std::optional<unsigned> KnownSA =
1991 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1992 unsigned ShAmt = *KnownSA;
1993 if (ShAmt == 0)
1994 return TLO.CombineTo(Op, Op0);
1995
1996 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1997 // single shift. We can do this if the top bits (which are shifted out)
1998 // are never demanded.
1999 // TODO - support non-uniform vector amounts.
2000 if (Op0.getOpcode() == ISD::SHL) {
2001 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2002 if (std::optional<unsigned> InnerSA =
2003 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2004 unsigned C1 = *InnerSA;
2005 unsigned Opc = ISD::SRL;
2006 int Diff = ShAmt - C1;
2007 if (Diff < 0) {
2008 Diff = -Diff;
2009 Opc = ISD::SHL;
2010 }
2011 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
2012 return TLO.CombineTo(
2013 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
2014 }
2015 }
2016 }
2017
2018 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2019 // single sra. We can do this if the top bits are never demanded.
2020 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2021 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2022 if (std::optional<unsigned> InnerSA =
2023 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2024 unsigned C1 = *InnerSA;
2025 // Clamp the combined shift amount if it exceeds the bit width.
2026 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2027 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2028 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2029 Op0.getOperand(0), NewSA));
2030 }
2031 }
2032 }
2033
2034 APInt InDemandedMask = (DemandedBits << ShAmt);
2035
2036 // If the shift is exact, then it does demand the low bits (and knows that
2037 // they are zero).
2038 if (Op->getFlags().hasExact())
2039 InDemandedMask.setLowBits(ShAmt);
2040
2041 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2042 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2043 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2045 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2046 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2047 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2048 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2049 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2050 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2051 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2052 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2053 SDValue NewShiftAmt =
2054 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2055 SDValue NewShift =
2056 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2057 return TLO.CombineTo(
2058 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2059 }
2060 }
2061
2062 // Compute the new bits that are at the top now.
2063 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2064 Depth + 1))
2065 return true;
2066 Known >>= ShAmt;
2067 // High bits known zero.
2068 Known.Zero.setHighBits(ShAmt);
2069
2070 // Attempt to avoid multi-use ops if we don't need anything from them.
2071 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2073 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2074 if (DemandedOp0) {
2075 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2076 return TLO.CombineTo(Op, NewOp);
2077 }
2078 }
2079 } else {
2080 // Use generic knownbits computation as it has support for non-uniform
2081 // shift amounts.
2082 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2083 }
2084
2085 // If we are only demanding sign bits then we can use the shift source
2086 // directly.
2087 if (std::optional<unsigned> MaxSA =
2088 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2089 unsigned ShAmt = *MaxSA;
2090 // Must already be signbits in DemandedBits bounds, and can't demand any
2091 // shifted in zeroes.
2092 if (DemandedBits.countl_zero() >= ShAmt) {
2093 unsigned NumSignBits =
2094 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2095 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2096 return TLO.CombineTo(Op, Op0);
2097 }
2098 }
2099
2100 // Try to match AVG patterns (after shift simplification).
2101 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2102 DemandedElts, Depth + 1))
2103 return TLO.CombineTo(Op, AVG);
2104
2105 break;
2106 }
2107 case ISD::SRA: {
2108 SDValue Op0 = Op.getOperand(0);
2109 SDValue Op1 = Op.getOperand(1);
2110 EVT ShiftVT = Op1.getValueType();
2111
2112 // If we only want bits that already match the signbit then we don't need
2113 // to shift.
2114 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2115 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2116 NumHiDemandedBits)
2117 return TLO.CombineTo(Op, Op0);
2118
2119 // If this is an arithmetic shift right and only the low-bit is set, we can
2120 // always convert this into a logical shr, even if the shift amount is
2121 // variable. The low bit of the shift cannot be an input sign bit unless
2122 // the shift amount is >= the size of the datatype, which is undefined.
2123 if (DemandedBits.isOne())
2124 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2125
2126 if (std::optional<unsigned> KnownSA =
2127 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2128 unsigned ShAmt = *KnownSA;
2129 if (ShAmt == 0)
2130 return TLO.CombineTo(Op, Op0);
2131
2132 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2133 // supports sext_inreg.
2134 if (Op0.getOpcode() == ISD::SHL) {
2135 if (std::optional<unsigned> InnerSA =
2136 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2137 unsigned LowBits = BitWidth - ShAmt;
2138 EVT ExtVT = VT.changeElementType(
2139 *TLO.DAG.getContext(),
2140 EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits));
2141
2142 if (*InnerSA == ShAmt) {
2143 if (!TLO.LegalOperations() ||
2145 return TLO.CombineTo(
2146 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2147 Op0.getOperand(0),
2148 TLO.DAG.getValueType(ExtVT)));
2149
2150 // Even if we can't convert to sext_inreg, we might be able to
2151 // remove this shift pair if the input is already sign extended.
2152 unsigned NumSignBits =
2153 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2154 if (NumSignBits > ShAmt)
2155 return TLO.CombineTo(Op, Op0.getOperand(0));
2156 }
2157 }
2158 }
2159
2160 APInt InDemandedMask = (DemandedBits << ShAmt);
2161
2162 // If the shift is exact, then it does demand the low bits (and knows that
2163 // they are zero).
2164 if (Op->getFlags().hasExact())
2165 InDemandedMask.setLowBits(ShAmt);
2166
2167 // If any of the demanded bits are produced by the sign extension, we also
2168 // demand the input sign bit.
2169 if (DemandedBits.countl_zero() < ShAmt)
2170 InDemandedMask.setSignBit();
2171
2172 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2173 Depth + 1))
2174 return true;
2175 Known >>= ShAmt;
2176
2177 // If the input sign bit is known to be zero, or if none of the top bits
2178 // are demanded, turn this into an unsigned shift right.
2179 if (Known.Zero[BitWidth - ShAmt - 1] ||
2180 DemandedBits.countl_zero() >= ShAmt) {
2181 SDNodeFlags Flags;
2182 Flags.setExact(Op->getFlags().hasExact());
2183 return TLO.CombineTo(
2184 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2185 }
2186
2187 int Log2 = DemandedBits.exactLogBase2();
2188 if (Log2 >= 0) {
2189 // The bit must come from the sign.
2190 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2191 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2192 }
2193
2194 if (Known.One[BitWidth - ShAmt - 1])
2195 // New bits are known one.
2196 Known.One.setHighBits(ShAmt);
2197
2198 // Attempt to avoid multi-use ops if we don't need anything from them.
2199 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2201 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2202 if (DemandedOp0) {
2203 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2204 return TLO.CombineTo(Op, NewOp);
2205 }
2206 }
2207 }
2208
2209 // Try to match AVG patterns (after shift simplification).
2210 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2211 DemandedElts, Depth + 1))
2212 return TLO.CombineTo(Op, AVG);
2213
2214 break;
2215 }
2216 case ISD::FSHL:
2217 case ISD::FSHR: {
2218 SDValue Op0 = Op.getOperand(0);
2219 SDValue Op1 = Op.getOperand(1);
2220 SDValue Op2 = Op.getOperand(2);
2221 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2222
2223 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2224 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2225
2226 // For fshl, 0-shift returns the 1st arg.
2227 // For fshr, 0-shift returns the 2nd arg.
2228 if (Amt == 0) {
2229 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2230 Known, TLO, Depth + 1))
2231 return true;
2232 break;
2233 }
2234
2235 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2236 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2237 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2238 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2239 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2240 Depth + 1))
2241 return true;
2242 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2243 Depth + 1))
2244 return true;
2245
2246 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2247 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2248 Known = Known.unionWith(Known2);
2249
2250 // Attempt to avoid multi-use ops if we don't need anything from them.
2251 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2252 !DemandedElts.isAllOnes()) {
2254 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2256 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2257 if (DemandedOp0 || DemandedOp1) {
2258 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2259 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2260 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2261 DemandedOp1, Op2);
2262 return TLO.CombineTo(Op, NewOp);
2263 }
2264 }
2265 }
2266
2267 if (isPowerOf2_32(BitWidth)) {
2268 // Fold FSHR(Op0,Op1,Op2) -> SRL(Op1,Op2)
2269 // iff we're guaranteed not to use Op0.
2270 // TODO: Add FSHL equivalent?
2271 if (!IsFSHL && !DemandedBits.isAllOnes() &&
2272 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT))) {
2273 KnownBits KnownAmt =
2274 TLO.DAG.computeKnownBits(Op2, DemandedElts, Depth + 1);
2275 unsigned MaxShiftAmt =
2276 KnownAmt.getMaxValue().getLimitedValue(BitWidth - 1);
2277 // Check we don't demand any shifted bits outside Op1.
2278 if (DemandedBits.countl_zero() >= MaxShiftAmt) {
2279 EVT AmtVT = Op2.getValueType();
2280 SDValue NewAmt =
2281 TLO.DAG.getNode(ISD::AND, dl, AmtVT, Op2,
2282 TLO.DAG.getConstant(BitWidth - 1, dl, AmtVT));
2283 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, Op1, NewAmt);
2284 return TLO.CombineTo(Op, NewOp);
2285 }
2286 }
2287
2288 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2289 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2290 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts, Known2, TLO,
2291 Depth + 1))
2292 return true;
2293 }
2294 break;
2295 }
2296 case ISD::ROTL:
2297 case ISD::ROTR: {
2298 SDValue Op0 = Op.getOperand(0);
2299 SDValue Op1 = Op.getOperand(1);
2300 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2301
2302 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2303 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2304 return TLO.CombineTo(Op, Op0);
2305
2306 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2307 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2308 unsigned RevAmt = BitWidth - Amt;
2309
2310 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2311 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2312 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2313 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2314 Depth + 1))
2315 return true;
2316
2317 // rot*(x, 0) --> x
2318 if (Amt == 0)
2319 return TLO.CombineTo(Op, Op0);
2320
2321 // See if we don't demand either half of the rotated bits.
2322 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2323 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2324 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2325 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2326 }
2327 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2328 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2329 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2330 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2331 }
2332 }
2333
2334 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2335 if (isPowerOf2_32(BitWidth)) {
2336 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2337 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2338 Depth + 1))
2339 return true;
2340 }
2341 break;
2342 }
2343 case ISD::SMIN:
2344 case ISD::SMAX:
2345 case ISD::UMIN:
2346 case ISD::UMAX: {
2347 unsigned Opc = Op.getOpcode();
2348 SDValue Op0 = Op.getOperand(0);
2349 SDValue Op1 = Op.getOperand(1);
2350
2351 // If we're only demanding signbits, then we can simplify to OR/AND node.
2352 unsigned BitOp =
2353 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2354 unsigned NumSignBits =
2355 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2356 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2357 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2358 if (NumSignBits >= NumDemandedUpperBits)
2359 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2360
2361 // Check if one arg is always less/greater than (or equal) to the other arg.
2362 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2363 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2364 switch (Opc) {
2365 case ISD::SMIN:
2366 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2367 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2368 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2369 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2370 Known = KnownBits::smin(Known0, Known1);
2371 break;
2372 case ISD::SMAX:
2373 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2374 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2375 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2376 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2377 Known = KnownBits::smax(Known0, Known1);
2378 break;
2379 case ISD::UMIN:
2380 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2381 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2382 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2383 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2384 Known = KnownBits::umin(Known0, Known1);
2385 break;
2386 case ISD::UMAX:
2387 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2388 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2389 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2390 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2391 Known = KnownBits::umax(Known0, Known1);
2392 break;
2393 }
2394 break;
2395 }
2396 case ISD::BITREVERSE: {
2397 SDValue Src = Op.getOperand(0);
2398 APInt DemandedSrcBits = DemandedBits.reverseBits();
2399 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2400 Depth + 1))
2401 return true;
2402 Known = Known2.reverseBits();
2403 break;
2404 }
2405 case ISD::BSWAP: {
2406 SDValue Src = Op.getOperand(0);
2407
2408 // If the only bits demanded come from one byte of the bswap result,
2409 // just shift the input byte into position to eliminate the bswap.
2410 unsigned NLZ = DemandedBits.countl_zero();
2411 unsigned NTZ = DemandedBits.countr_zero();
2412
2413 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2414 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2415 // have 14 leading zeros, round to 8.
2416 NLZ = alignDown(NLZ, 8);
2417 NTZ = alignDown(NTZ, 8);
2418 // If we need exactly one byte, we can do this transformation.
2419 if (BitWidth - NLZ - NTZ == 8) {
2420 // Replace this with either a left or right shift to get the byte into
2421 // the right place.
2422 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2423 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2424 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2425 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2426 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2427 return TLO.CombineTo(Op, NewOp);
2428 }
2429 }
2430
2431 APInt DemandedSrcBits = DemandedBits.byteSwap();
2432 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2433 Depth + 1))
2434 return true;
2435 Known = Known2.byteSwap();
2436 break;
2437 }
2438 case ISD::CTPOP: {
2439 // If only 1 bit is demanded, replace with PARITY as long as we're before
2440 // op legalization.
2441 // FIXME: Limit to scalars for now.
2442 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2443 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2444 Op.getOperand(0)));
2445
2446 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2447 break;
2448 }
2450 SDValue Op0 = Op.getOperand(0);
2451 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2452 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2453
2454 // If we only care about the highest bit, don't bother shifting right.
2455 if (DemandedBits.isSignMask()) {
2456 unsigned MinSignedBits =
2457 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2458 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2459 // However if the input is already sign extended we expect the sign
2460 // extension to be dropped altogether later and do not simplify.
2461 if (!AlreadySignExtended) {
2462 // Compute the correct shift amount type, which must be getShiftAmountTy
2463 // for scalar types after legalization.
2464 SDValue ShiftAmt =
2465 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2466 return TLO.CombineTo(Op,
2467 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2468 }
2469 }
2470
2471 // If none of the extended bits are demanded, eliminate the sextinreg.
2472 if (DemandedBits.getActiveBits() <= ExVTBits)
2473 return TLO.CombineTo(Op, Op0);
2474
2475 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2476
2477 // Since the sign extended bits are demanded, we know that the sign
2478 // bit is demanded.
2479 InputDemandedBits.setBit(ExVTBits - 1);
2480
2481 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2482 Depth + 1))
2483 return true;
2484
2485 // If the sign bit of the input is known set or clear, then we know the
2486 // top bits of the result.
2487
2488 // If the input sign bit is known zero, convert this into a zero extension.
2489 if (Known.Zero[ExVTBits - 1])
2490 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2491
2492 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2493 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2494 Known.One.setBitsFrom(ExVTBits);
2495 Known.Zero &= Mask;
2496 } else { // Input sign bit unknown
2497 Known.Zero &= Mask;
2498 Known.One &= Mask;
2499 }
2500 break;
2501 }
2502 case ISD::BUILD_PAIR: {
2503 EVT HalfVT = Op.getOperand(0).getValueType();
2504 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2505
2506 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2507 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2508
2509 KnownBits KnownLo, KnownHi;
2510
2511 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2512 return true;
2513
2514 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2515 return true;
2516
2517 Known = KnownHi.concat(KnownLo);
2518 break;
2519 }
2521 if (VT.isScalableVector())
2522 return false;
2523 [[fallthrough]];
2524 case ISD::ZERO_EXTEND: {
2525 SDValue Src = Op.getOperand(0);
2526 EVT SrcVT = Src.getValueType();
2527 unsigned InBits = SrcVT.getScalarSizeInBits();
2528 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2529 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2530
2531 // If none of the top bits are demanded, convert this into an any_extend.
2532 if (DemandedBits.getActiveBits() <= InBits) {
2533 // If we only need the non-extended bits of the bottom element
2534 // then we can just bitcast to the result.
2535 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2536 VT.getSizeInBits() == SrcVT.getSizeInBits())
2537 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2538
2539 unsigned Opc =
2541 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2542 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2543 }
2544
2545 APInt InDemandedBits = DemandedBits.trunc(InBits);
2546 APInt InDemandedElts = DemandedElts.zext(InElts);
2547 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2548 Depth + 1)) {
2549 Op->dropFlags(SDNodeFlags::NonNeg);
2550 return true;
2551 }
2552 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2553 Known = Known.zext(BitWidth);
2554
2555 // Attempt to avoid multi-use ops if we don't need anything from them.
2557 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2558 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2559 break;
2560 }
2562 if (VT.isScalableVector())
2563 return false;
2564 [[fallthrough]];
2565 case ISD::SIGN_EXTEND: {
2566 SDValue Src = Op.getOperand(0);
2567 EVT SrcVT = Src.getValueType();
2568 unsigned InBits = SrcVT.getScalarSizeInBits();
2569 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2570 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2571
2572 APInt InDemandedElts = DemandedElts.zext(InElts);
2573 APInt InDemandedBits = DemandedBits.trunc(InBits);
2574
2575 // Since some of the sign extended bits are demanded, we know that the sign
2576 // bit is demanded.
2577 InDemandedBits.setBit(InBits - 1);
2578
2579 // If none of the top bits are demanded, convert this into an any_extend.
2580 if (DemandedBits.getActiveBits() <= InBits) {
2581 // If we only need the non-extended bits of the bottom element
2582 // then we can just bitcast to the result.
2583 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2584 VT.getSizeInBits() == SrcVT.getSizeInBits())
2585 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2586
2587 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2589 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2590 InBits) {
2591 unsigned Opc =
2593 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2594 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2595 }
2596 }
2597
2598 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2599 Depth + 1))
2600 return true;
2601 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2602
2603 // If the sign bit is known one, the top bits match.
2604 Known = Known.sext(BitWidth);
2605
2606 // If the sign bit is known zero, convert this to a zero extend.
2607 if (Known.isNonNegative()) {
2608 unsigned Opc =
2610 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2611 SDNodeFlags Flags;
2612 if (!IsVecInReg)
2613 Flags |= SDNodeFlags::NonNeg;
2614 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2615 }
2616 }
2617
2618 // Attempt to avoid multi-use ops if we don't need anything from them.
2620 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2621 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2622 break;
2623 }
2625 if (VT.isScalableVector())
2626 return false;
2627 [[fallthrough]];
2628 case ISD::ANY_EXTEND: {
2629 SDValue Src = Op.getOperand(0);
2630 EVT SrcVT = Src.getValueType();
2631 unsigned InBits = SrcVT.getScalarSizeInBits();
2632 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2633 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2634
2635 // If we only need the bottom element then we can just bitcast.
2636 // TODO: Handle ANY_EXTEND?
2637 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2638 VT.getSizeInBits() == SrcVT.getSizeInBits())
2639 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2640
2641 APInt InDemandedBits = DemandedBits.trunc(InBits);
2642 APInt InDemandedElts = DemandedElts.zext(InElts);
2643 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2644 Depth + 1))
2645 return true;
2646 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2647 Known = Known.anyext(BitWidth);
2648
2649 // Attempt to avoid multi-use ops if we don't need anything from them.
2651 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2652 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2653 break;
2654 }
2655 case ISD::TRUNCATE: {
2656 SDValue Src = Op.getOperand(0);
2657
2658 // Simplify the input, using demanded bit information, and compute the known
2659 // zero/one bits live out.
2660 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2661 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2662 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2663 Depth + 1)) {
2664 // Disable the nsw and nuw flags. We can no longer guarantee that we
2665 // won't wrap after simplification.
2666 Op->dropFlags(SDNodeFlags::NoWrap);
2667 return true;
2668 }
2669 Known = Known.trunc(BitWidth);
2670
2671 // Attempt to avoid multi-use ops if we don't need anything from them.
2673 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2674 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2675
2676 // If the input is only used by this truncate, see if we can shrink it based
2677 // on the known demanded bits.
2678 switch (Src.getOpcode()) {
2679 default:
2680 break;
2681 case ISD::SRL:
2682 // Shrink SRL by a constant if none of the high bits shifted in are
2683 // demanded.
2684 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2685 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2686 // undesirable.
2687 break;
2688
2689 if (Src.getNode()->hasOneUse()) {
2690 if (isTruncateFree(Src, VT) &&
2691 !isTruncateFree(Src.getValueType(), VT)) {
2692 // If truncate is only free at trunc(srl), do not turn it into
2693 // srl(trunc). The check is done by first check the truncate is free
2694 // at Src's opcode(srl), then check the truncate is not done by
2695 // referencing sub-register. In test, if both trunc(srl) and
2696 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2697 // trunc(srl)'s trunc is free, trunc(srl) is better.
2698 break;
2699 }
2700
2701 std::optional<unsigned> ShAmtC =
2702 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2703 if (!ShAmtC || *ShAmtC >= BitWidth)
2704 break;
2705 unsigned ShVal = *ShAmtC;
2706
2707 APInt HighBits =
2708 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2709 HighBits.lshrInPlace(ShVal);
2710 HighBits = HighBits.trunc(BitWidth);
2711 if (!(HighBits & DemandedBits)) {
2712 // None of the shifted in bits are needed. Add a truncate of the
2713 // shift input, then shift it.
2714 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2715 SDValue NewTrunc =
2716 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2717 return TLO.CombineTo(
2718 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2719 }
2720 }
2721 break;
2722 }
2723
2724 break;
2725 }
2726 case ISD::AssertZext: {
2727 // AssertZext demands all of the high bits, plus any of the low bits
2728 // demanded by its users.
2729 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2731 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2732 TLO, Depth + 1))
2733 return true;
2734
2735 Known.Zero |= ~InMask;
2736 Known.One &= (~Known.Zero);
2737 break;
2738 }
2740 SDValue Src = Op.getOperand(0);
2741 SDValue Idx = Op.getOperand(1);
2742 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2743 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2744
2745 if (SrcEltCnt.isScalable())
2746 return false;
2747
2748 // Demand the bits from every vector element without a constant index.
2749 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2750 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2751 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2752 if (CIdx->getAPIntValue().ult(NumSrcElts))
2753 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2754
2755 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2756 // anything about the extended bits.
2757 APInt DemandedSrcBits = DemandedBits;
2758 if (BitWidth > EltBitWidth)
2759 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2760
2761 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2762 Depth + 1))
2763 return true;
2764
2765 // Attempt to avoid multi-use ops if we don't need anything from them.
2766 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2767 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2768 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2769 SDValue NewOp =
2770 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2771 return TLO.CombineTo(Op, NewOp);
2772 }
2773 }
2774
2775 Known = Known2;
2776 if (BitWidth > EltBitWidth)
2777 Known = Known.anyext(BitWidth);
2778 break;
2779 }
2780 case ISD::BITCAST: {
2781 if (VT.isScalableVector())
2782 return false;
2783 SDValue Src = Op.getOperand(0);
2784 EVT SrcVT = Src.getValueType();
2785 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2786
2787 // If this is an FP->Int bitcast and if the sign bit is the only
2788 // thing demanded, turn this into a FGETSIGN.
2789 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2790 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2791 SrcVT.isFloatingPoint()) {
2792 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2793 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2794 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2795 SrcVT != MVT::f128) {
2796 // Cannot eliminate/lower SHL for f128 yet.
2797 EVT Ty = OpVTLegal ? VT : MVT::i32;
2798 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2799 // place. We expect the SHL to be eliminated by other optimizations.
2800 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2801 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2802 if (!OpVTLegal && OpVTSizeInBits > 32)
2803 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2804 unsigned ShVal = Op.getValueSizeInBits() - 1;
2805 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2806 return TLO.CombineTo(Op,
2807 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2808 }
2809 }
2810
2811 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2812 // Demand the elt/bit if any of the original elts/bits are demanded.
2813 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2814 unsigned Scale = BitWidth / NumSrcEltBits;
2815 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2816 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2817 for (unsigned i = 0; i != Scale; ++i) {
2818 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2819 unsigned BitOffset = EltOffset * NumSrcEltBits;
2820 DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2821 }
2822 // Recursive calls below may turn not demanded elements into poison, so we
2823 // need to demand all smaller source elements that maps to a demanded
2824 // destination element.
2825 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2826
2827 APInt KnownSrcUndef, KnownSrcZero;
2828 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2829 KnownSrcZero, TLO, Depth + 1))
2830 return true;
2831
2832 KnownBits KnownSrcBits;
2833 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2834 KnownSrcBits, TLO, Depth + 1))
2835 return true;
2836 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2837 // TODO - bigendian once we have test coverage.
2838 unsigned Scale = NumSrcEltBits / BitWidth;
2839 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2840 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2841 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2842 for (unsigned i = 0; i != NumElts; ++i)
2843 if (DemandedElts[i]) {
2844 unsigned Offset = (i % Scale) * BitWidth;
2845 DemandedSrcBits.insertBits(DemandedBits, Offset);
2846 DemandedSrcElts.setBit(i / Scale);
2847 }
2848
2849 if (SrcVT.isVector()) {
2850 APInt KnownSrcUndef, KnownSrcZero;
2851 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2852 KnownSrcZero, TLO, Depth + 1))
2853 return true;
2854 }
2855
2856 KnownBits KnownSrcBits;
2857 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2858 KnownSrcBits, TLO, Depth + 1))
2859 return true;
2860
2861 // Attempt to avoid multi-use ops if we don't need anything from them.
2862 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2863 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2864 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2865 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2866 return TLO.CombineTo(Op, NewOp);
2867 }
2868 }
2869 }
2870
2871 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2872 // recursive call where Known may be useful to the caller.
2873 if (Depth > 0) {
2874 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2875 return false;
2876 }
2877 break;
2878 }
2879 case ISD::MUL:
2880 if (DemandedBits.isPowerOf2()) {
2881 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2882 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2883 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2884 unsigned CTZ = DemandedBits.countr_zero();
2885 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2886 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2887 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2888 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2889 return TLO.CombineTo(Op, Shl);
2890 }
2891 }
2892 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2893 // X * X is odd iff X is odd.
2894 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2895 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2896 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2897 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2898 return TLO.CombineTo(Op, And1);
2899 }
2900 [[fallthrough]];
2901 case ISD::PTRADD:
2902 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
2903 break;
2904 // PTRADD behaves like ADD if pointers are represented as integers.
2905 [[fallthrough]];
2906 case ISD::ADD:
2907 case ISD::SUB: {
2908 // Add, Sub, and Mul don't demand any bits in positions beyond that
2909 // of the highest bit demanded of them.
2910 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2911 SDNodeFlags Flags = Op.getNode()->getFlags();
2912 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2913 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2914 KnownBits KnownOp0, KnownOp1;
2915 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2916 const KnownBits &KnownRHS) {
2917 if (Op.getOpcode() == ISD::MUL)
2918 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2919 return Demanded;
2920 };
2921 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2922 Depth + 1) ||
2923 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2924 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2925 // See if the operation should be performed at a smaller bit width.
2927 // Disable the nsw and nuw flags. We can no longer guarantee that we
2928 // won't wrap after simplification.
2929 Op->dropFlags(SDNodeFlags::NoWrap);
2930 return true;
2931 }
2932
2933 // neg x with only low bit demanded is simply x.
2934 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2935 isNullConstant(Op0))
2936 return TLO.CombineTo(Op, Op1);
2937
2938 // Attempt to avoid multi-use ops if we don't need anything from them.
2939 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2941 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2943 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2944 if (DemandedOp0 || DemandedOp1) {
2945 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2946 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2947 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2948 Flags & ~SDNodeFlags::NoWrap);
2949 return TLO.CombineTo(Op, NewOp);
2950 }
2951 }
2952
2953 // If we have a constant operand, we may be able to turn it into -1 if we
2954 // do not demand the high bits. This can make the constant smaller to
2955 // encode, allow more general folding, or match specialized instruction
2956 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2957 // is probably not useful (and could be detrimental).
2959 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2960 if (C && !C->isAllOnes() && !C->isOne() &&
2961 (C->getAPIntValue() | HighMask).isAllOnes()) {
2962 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2963 // Disable the nsw and nuw flags. We can no longer guarantee that we
2964 // won't wrap after simplification.
2965 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2966 Flags & ~SDNodeFlags::NoWrap);
2967 return TLO.CombineTo(Op, NewOp);
2968 }
2969
2970 // Match a multiply with a disguised negated-power-of-2 and convert to a
2971 // an equivalent shift-left amount.
2972 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2973 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2974 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2975 return 0;
2976
2977 // Don't touch opaque constants. Also, ignore zero and power-of-2
2978 // multiplies. Those will get folded later.
2979 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2980 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2981 !MulC->getAPIntValue().isPowerOf2()) {
2982 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2983 if (UnmaskedC.isNegatedPowerOf2())
2984 return (-UnmaskedC).logBase2();
2985 }
2986 return 0;
2987 };
2988
2989 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2990 unsigned ShlAmt) {
2991 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2992 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2993 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2994 return TLO.CombineTo(Op, Res);
2995 };
2996
2998 if (Op.getOpcode() == ISD::ADD) {
2999 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
3000 if (unsigned ShAmt = getShiftLeftAmt(Op0))
3001 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
3002 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
3003 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3004 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
3005 }
3006 if (Op.getOpcode() == ISD::SUB) {
3007 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
3008 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3009 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
3010 }
3011 }
3012
3013 if (Op.getOpcode() == ISD::MUL) {
3014 Known = KnownBits::mul(KnownOp0, KnownOp1);
3015 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
3017 Op.getOpcode() != ISD::SUB, Flags.hasNoSignedWrap(),
3018 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
3019 }
3020 break;
3021 }
3022 case ISD::FABS: {
3023 SDValue Op0 = Op.getOperand(0);
3024 APInt SignMask = APInt::getSignMask(BitWidth);
3025
3026 if (!DemandedBits.intersects(SignMask))
3027 return TLO.CombineTo(Op, Op0);
3028
3029 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3030 Depth + 1))
3031 return true;
3032
3033 if (Known.isNonNegative())
3034 return TLO.CombineTo(Op, Op0);
3035 if (Known.isNegative())
3036 return TLO.CombineTo(
3037 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
3038
3039 Known.Zero |= SignMask;
3040 Known.One &= ~SignMask;
3041
3042 break;
3043 }
3044 case ISD::FCOPYSIGN: {
3045 SDValue Op0 = Op.getOperand(0);
3046 SDValue Op1 = Op.getOperand(1);
3047
3048 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3049 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3050 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3051 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3052
3053 if (!DemandedBits.intersects(SignMask0))
3054 return TLO.CombineTo(Op, Op0);
3055
3056 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3057 Known, TLO, Depth + 1) ||
3058 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3059 Depth + 1))
3060 return true;
3061
3062 if (Known2.isNonNegative())
3063 return TLO.CombineTo(
3064 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3065
3066 if (Known2.isNegative())
3067 return TLO.CombineTo(
3068 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3069 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3070
3071 Known.Zero &= ~SignMask0;
3072 Known.One &= ~SignMask0;
3073 break;
3074 }
3075 case ISD::FNEG: {
3076 SDValue Op0 = Op.getOperand(0);
3077 APInt SignMask = APInt::getSignMask(BitWidth);
3078
3079 if (!DemandedBits.intersects(SignMask))
3080 return TLO.CombineTo(Op, Op0);
3081
3082 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3083 Depth + 1))
3084 return true;
3085
3086 if (!Known.isSignUnknown()) {
3087 Known.Zero ^= SignMask;
3088 Known.One ^= SignMask;
3089 }
3090
3091 break;
3092 }
3093 default:
3094 // We also ask the target about intrinsics (which could be specific to it).
3095 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3096 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3097 // TODO: Probably okay to remove after audit; here to reduce change size
3098 // in initial enablement patch for scalable vectors
3099 if (Op.getValueType().isScalableVector())
3100 break;
3102 Known, TLO, Depth))
3103 return true;
3104 break;
3105 }
3106
3107 // Just use computeKnownBits to compute output bits.
3108 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3109 break;
3110 }
3111
3112 // If we know the value of all of the demanded bits, return this as a
3113 // constant.
3115 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3116 // Avoid folding to a constant if any OpaqueConstant is involved.
3117 if (llvm::any_of(Op->ops(), [](SDValue V) {
3118 auto *C = dyn_cast<ConstantSDNode>(V);
3119 return C && C->isOpaque();
3120 }))
3121 return false;
3122 if (VT.isInteger())
3123 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3124 if (VT.isFloatingPoint())
3125 return TLO.CombineTo(
3126 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3127 dl, VT));
3128 }
3129
3130 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3131 // Try again just for the original demanded elts.
3132 // Ensure we do this AFTER constant folding above.
3133 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3134 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3135
3136 return false;
3137}
3138
3140 const APInt &DemandedElts,
3141 DAGCombinerInfo &DCI) const {
3142 SelectionDAG &DAG = DCI.DAG;
3143 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3144 !DCI.isBeforeLegalizeOps());
3145
3146 APInt KnownUndef, KnownZero;
3147 bool Simplified =
3148 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3149 if (Simplified) {
3150 DCI.AddToWorklist(Op.getNode());
3151 DCI.CommitTargetLoweringOpt(TLO);
3152 }
3153
3154 return Simplified;
3155}
3156
3157/// Given a vector binary operation and known undefined elements for each input
3158/// operand, compute whether each element of the output is undefined.
3160 const APInt &UndefOp0,
3161 const APInt &UndefOp1) {
3162 EVT VT = BO.getValueType();
3164 "Vector binop only");
3165
3166 EVT EltVT = VT.getVectorElementType();
3167 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3168 assert(UndefOp0.getBitWidth() == NumElts &&
3169 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3170
3171 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3172 const APInt &UndefVals) {
3173 if (UndefVals[Index])
3174 return DAG.getUNDEF(EltVT);
3175
3176 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3177 // Try hard to make sure that the getNode() call is not creating temporary
3178 // nodes. Ignore opaque integers because they do not constant fold.
3179 SDValue Elt = BV->getOperand(Index);
3180 auto *C = dyn_cast<ConstantSDNode>(Elt);
3181 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3182 return Elt;
3183 }
3184
3185 return SDValue();
3186 };
3187
3188 APInt KnownUndef = APInt::getZero(NumElts);
3189 for (unsigned i = 0; i != NumElts; ++i) {
3190 // If both inputs for this element are either constant or undef and match
3191 // the element type, compute the constant/undef result for this element of
3192 // the vector.
3193 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3194 // not handle FP constants. The code within getNode() should be refactored
3195 // to avoid the danger of creating a bogus temporary node here.
3196 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3197 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3198 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3199 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3200 KnownUndef.setBit(i);
3201 }
3202 return KnownUndef;
3203}
3204
3206 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3207 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3208 bool AssumeSingleUse) const {
3209 EVT VT = Op.getValueType();
3210 unsigned Opcode = Op.getOpcode();
3211 APInt DemandedElts = OriginalDemandedElts;
3212 unsigned NumElts = DemandedElts.getBitWidth();
3213 assert(VT.isVector() && "Expected vector op");
3214
3215 KnownUndef = KnownZero = APInt::getZero(NumElts);
3216
3218 return false;
3219
3220 // TODO: For now we assume we know nothing about scalable vectors.
3221 if (VT.isScalableVector())
3222 return false;
3223
3224 assert(VT.getVectorNumElements() == NumElts &&
3225 "Mask size mismatches value type element count!");
3226
3227 // Undef operand.
3228 if (Op.isUndef()) {
3229 KnownUndef.setAllBits();
3230 return false;
3231 }
3232
3233 // If Op has other users, assume that all elements are needed.
3234 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3235 DemandedElts.setAllBits();
3236
3237 // Not demanding any elements from Op.
3238 if (DemandedElts == 0) {
3239 KnownUndef.setAllBits();
3240 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3241 }
3242
3243 // Limit search depth.
3245 return false;
3246
3247 SDLoc DL(Op);
3248 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3249 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3250
3251 // Helper for demanding the specified elements and all the bits of both binary
3252 // operands.
3253 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3254 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3255 TLO.DAG, Depth + 1);
3256 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3257 TLO.DAG, Depth + 1);
3258 if (NewOp0 || NewOp1) {
3259 SDValue NewOp =
3260 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3261 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3262 return TLO.CombineTo(Op, NewOp);
3263 }
3264 return false;
3265 };
3266
3267 switch (Opcode) {
3268 case ISD::SCALAR_TO_VECTOR: {
3269 if (!DemandedElts[0]) {
3270 KnownUndef.setAllBits();
3271 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3272 }
3273 KnownUndef.setHighBits(NumElts - 1);
3274 break;
3275 }
3276 case ISD::BITCAST: {
3277 SDValue Src = Op.getOperand(0);
3278 EVT SrcVT = Src.getValueType();
3279
3280 if (!SrcVT.isVector()) {
3281 // TODO - bigendian once we have test coverage.
3282 if (IsLE) {
3283 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3284 unsigned EltSize = VT.getScalarSizeInBits();
3285 for (unsigned I = 0; I != NumElts; ++I) {
3286 if (DemandedElts[I]) {
3287 unsigned Offset = I * EltSize;
3288 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3289 }
3290 }
3291 KnownBits Known;
3292 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3293 return true;
3294 }
3295 break;
3296 }
3297
3298 // Fast handling of 'identity' bitcasts.
3299 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3300 if (NumSrcElts == NumElts)
3301 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3302 KnownZero, TLO, Depth + 1);
3303
3304 APInt SrcDemandedElts, SrcZero, SrcUndef;
3305
3306 // Bitcast from 'large element' src vector to 'small element' vector, we
3307 // must demand a source element if any DemandedElt maps to it.
3308 if ((NumElts % NumSrcElts) == 0) {
3309 unsigned Scale = NumElts / NumSrcElts;
3310 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3311 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3312 TLO, Depth + 1))
3313 return true;
3314
3315 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3316 // of the large element.
3317 // TODO - bigendian once we have test coverage.
3318 if (IsLE) {
3319 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3320 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3321 for (unsigned i = 0; i != NumElts; ++i)
3322 if (DemandedElts[i]) {
3323 unsigned Ofs = (i % Scale) * EltSizeInBits;
3324 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3325 }
3326
3327 KnownBits Known;
3328 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3329 TLO, Depth + 1))
3330 return true;
3331
3332 // The bitcast has split each wide element into a number of
3333 // narrow subelements. We have just computed the Known bits
3334 // for wide elements. See if element splitting results in
3335 // some subelements being zero. Only for demanded elements!
3336 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3337 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3338 .isAllOnes())
3339 continue;
3340 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3341 unsigned Elt = Scale * SrcElt + SubElt;
3342 if (DemandedElts[Elt])
3343 KnownZero.setBit(Elt);
3344 }
3345 }
3346 }
3347
3348 // If the src element is zero/undef then all the output elements will be -
3349 // only demanded elements are guaranteed to be correct.
3350 for (unsigned i = 0; i != NumSrcElts; ++i) {
3351 if (SrcDemandedElts[i]) {
3352 if (SrcZero[i])
3353 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3354 if (SrcUndef[i])
3355 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3356 }
3357 }
3358 }
3359
3360 // Bitcast from 'small element' src vector to 'large element' vector, we
3361 // demand all smaller source elements covered by the larger demanded element
3362 // of this vector.
3363 if ((NumSrcElts % NumElts) == 0) {
3364 unsigned Scale = NumSrcElts / NumElts;
3365 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3366 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3367 TLO, Depth + 1))
3368 return true;
3369
3370 // If all the src elements covering an output element are zero/undef, then
3371 // the output element will be as well, assuming it was demanded.
3372 for (unsigned i = 0; i != NumElts; ++i) {
3373 if (DemandedElts[i]) {
3374 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3375 KnownZero.setBit(i);
3376 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3377 KnownUndef.setBit(i);
3378 }
3379 }
3380 }
3381 break;
3382 }
3383 case ISD::FREEZE: {
3384 SDValue N0 = Op.getOperand(0);
3385 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3386 /*PoisonOnly=*/false,
3387 Depth + 1))
3388 return TLO.CombineTo(Op, N0);
3389
3390 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3391 // freeze(op(x, ...)) -> op(freeze(x), ...).
3392 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3393 return TLO.CombineTo(
3395 TLO.DAG.getFreeze(N0.getOperand(0))));
3396 break;
3397 }
3398 case ISD::BUILD_VECTOR: {
3399 // Check all elements and simplify any unused elements with UNDEF.
3400 if (!DemandedElts.isAllOnes()) {
3401 // Don't simplify BROADCASTS.
3402 if (llvm::any_of(Op->op_values(),
3403 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3405 bool Updated = false;
3406 for (unsigned i = 0; i != NumElts; ++i) {
3407 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3408 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3409 KnownUndef.setBit(i);
3410 Updated = true;
3411 }
3412 }
3413 if (Updated)
3414 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3415 }
3416 }
3417 for (unsigned i = 0; i != NumElts; ++i) {
3418 SDValue SrcOp = Op.getOperand(i);
3419 if (SrcOp.isUndef()) {
3420 KnownUndef.setBit(i);
3421 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3423 KnownZero.setBit(i);
3424 }
3425 }
3426 break;
3427 }
3428 case ISD::CONCAT_VECTORS: {
3429 EVT SubVT = Op.getOperand(0).getValueType();
3430 unsigned NumSubVecs = Op.getNumOperands();
3431 unsigned NumSubElts = SubVT.getVectorNumElements();
3432 for (unsigned i = 0; i != NumSubVecs; ++i) {
3433 SDValue SubOp = Op.getOperand(i);
3434 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3435 APInt SubUndef, SubZero;
3436 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3437 Depth + 1))
3438 return true;
3439 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3440 KnownZero.insertBits(SubZero, i * NumSubElts);
3441 }
3442
3443 // Attempt to avoid multi-use ops if we don't need anything from them.
3444 if (!DemandedElts.isAllOnes()) {
3445 bool FoundNewSub = false;
3446 SmallVector<SDValue, 2> DemandedSubOps;
3447 for (unsigned i = 0; i != NumSubVecs; ++i) {
3448 SDValue SubOp = Op.getOperand(i);
3449 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3451 SubOp, SubElts, TLO.DAG, Depth + 1);
3452 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3453 FoundNewSub = NewSubOp ? true : FoundNewSub;
3454 }
3455 if (FoundNewSub) {
3456 SDValue NewOp =
3457 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3458 return TLO.CombineTo(Op, NewOp);
3459 }
3460 }
3461 break;
3462 }
3463 case ISD::INSERT_SUBVECTOR: {
3464 // Demand any elements from the subvector and the remainder from the src it
3465 // is inserted into.
3466 SDValue Src = Op.getOperand(0);
3467 SDValue Sub = Op.getOperand(1);
3468 uint64_t Idx = Op.getConstantOperandVal(2);
3469 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3470 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3471 APInt DemandedSrcElts = DemandedElts;
3472 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3473
3474 // If none of the sub operand elements are demanded, bypass the insert.
3475 if (!DemandedSubElts)
3476 return TLO.CombineTo(Op, Src);
3477
3478 APInt SubUndef, SubZero;
3479 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3480 Depth + 1))
3481 return true;
3482
3483 // If none of the src operand elements are demanded, replace it with undef.
3484 if (!DemandedSrcElts && !Src.isUndef())
3485 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3486 TLO.DAG.getUNDEF(VT), Sub,
3487 Op.getOperand(2)));
3488
3489 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3490 TLO, Depth + 1))
3491 return true;
3492 KnownUndef.insertBits(SubUndef, Idx);
3493 KnownZero.insertBits(SubZero, Idx);
3494
3495 // Attempt to avoid multi-use ops if we don't need anything from them.
3496 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3498 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3500 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3501 if (NewSrc || NewSub) {
3502 NewSrc = NewSrc ? NewSrc : Src;
3503 NewSub = NewSub ? NewSub : Sub;
3504 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3505 NewSub, Op.getOperand(2));
3506 return TLO.CombineTo(Op, NewOp);
3507 }
3508 }
3509 break;
3510 }
3512 // Offset the demanded elts by the subvector index.
3513 SDValue Src = Op.getOperand(0);
3514 if (Src.getValueType().isScalableVector())
3515 break;
3516 uint64_t Idx = Op.getConstantOperandVal(1);
3517 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3518 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3519
3520 APInt SrcUndef, SrcZero;
3521 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3522 Depth + 1))
3523 return true;
3524 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3525 KnownZero = SrcZero.extractBits(NumElts, Idx);
3526
3527 // Attempt to avoid multi-use ops if we don't need anything from them.
3528 if (!DemandedElts.isAllOnes()) {
3530 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3531 if (NewSrc) {
3532 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3533 Op.getOperand(1));
3534 return TLO.CombineTo(Op, NewOp);
3535 }
3536 }
3537 break;
3538 }
3540 SDValue Vec = Op.getOperand(0);
3541 SDValue Scl = Op.getOperand(1);
3542 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3543
3544 // For a legal, constant insertion index, if we don't need this insertion
3545 // then strip it, else remove it from the demanded elts.
3546 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3547 unsigned Idx = CIdx->getZExtValue();
3548 if (!DemandedElts[Idx])
3549 return TLO.CombineTo(Op, Vec);
3550
3551 APInt DemandedVecElts(DemandedElts);
3552 DemandedVecElts.clearBit(Idx);
3553 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3554 KnownZero, TLO, Depth + 1))
3555 return true;
3556
3557 KnownUndef.setBitVal(Idx, Scl.isUndef());
3558
3559 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3560 break;
3561 }
3562
3563 APInt VecUndef, VecZero;
3564 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3565 Depth + 1))
3566 return true;
3567 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3568 break;
3569 }
3570 case ISD::VSELECT: {
3571 SDValue Sel = Op.getOperand(0);
3572 SDValue LHS = Op.getOperand(1);
3573 SDValue RHS = Op.getOperand(2);
3574
3575 // Try to transform the select condition based on the current demanded
3576 // elements.
3577 APInt UndefSel, ZeroSel;
3578 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3579 Depth + 1))
3580 return true;
3581
3582 // See if we can simplify either vselect operand.
3583 APInt DemandedLHS(DemandedElts);
3584 APInt DemandedRHS(DemandedElts);
3585 APInt UndefLHS, ZeroLHS;
3586 APInt UndefRHS, ZeroRHS;
3587 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3588 Depth + 1))
3589 return true;
3590 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3591 Depth + 1))
3592 return true;
3593
3594 KnownUndef = UndefLHS & UndefRHS;
3595 KnownZero = ZeroLHS & ZeroRHS;
3596
3597 // If we know that the selected element is always zero, we don't need the
3598 // select value element.
3599 APInt DemandedSel = DemandedElts & ~KnownZero;
3600 if (DemandedSel != DemandedElts)
3601 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3602 Depth + 1))
3603 return true;
3604
3605 break;
3606 }
3607 case ISD::VECTOR_SHUFFLE: {
3608 SDValue LHS = Op.getOperand(0);
3609 SDValue RHS = Op.getOperand(1);
3610 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3611
3612 // Collect demanded elements from shuffle operands..
3613 APInt DemandedLHS(NumElts, 0);
3614 APInt DemandedRHS(NumElts, 0);
3615 for (unsigned i = 0; i != NumElts; ++i) {
3616 int M = ShuffleMask[i];
3617 if (M < 0 || !DemandedElts[i])
3618 continue;
3619 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3620 if (M < (int)NumElts)
3621 DemandedLHS.setBit(M);
3622 else
3623 DemandedRHS.setBit(M - NumElts);
3624 }
3625
3626 // If either side isn't demanded, replace it by UNDEF. We handle this
3627 // explicitly here to also simplify in case of multiple uses (on the
3628 // contrary to the SimplifyDemandedVectorElts calls below).
3629 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3630 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3631 if (FoldLHS || FoldRHS) {
3632 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3633 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3634 SDValue NewOp =
3635 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3636 return TLO.CombineTo(Op, NewOp);
3637 }
3638
3639 // See if we can simplify either shuffle operand.
3640 APInt UndefLHS, ZeroLHS;
3641 APInt UndefRHS, ZeroRHS;
3642 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3643 Depth + 1))
3644 return true;
3645 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3646 Depth + 1))
3647 return true;
3648
3649 // Simplify mask using undef elements from LHS/RHS.
3650 bool Updated = false;
3651 bool IdentityLHS = true, IdentityRHS = true;
3652 SmallVector<int, 32> NewMask(ShuffleMask);
3653 for (unsigned i = 0; i != NumElts; ++i) {
3654 int &M = NewMask[i];
3655 if (M < 0)
3656 continue;
3657 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3658 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3659 Updated = true;
3660 M = -1;
3661 }
3662 IdentityLHS &= (M < 0) || (M == (int)i);
3663 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3664 }
3665
3666 // Update legal shuffle masks based on demanded elements if it won't reduce
3667 // to Identity which can cause premature removal of the shuffle mask.
3668 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3669 SDValue LegalShuffle =
3670 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3671 if (LegalShuffle)
3672 return TLO.CombineTo(Op, LegalShuffle);
3673 }
3674
3675 // Propagate undef/zero elements from LHS/RHS.
3676 for (unsigned i = 0; i != NumElts; ++i) {
3677 int M = ShuffleMask[i];
3678 if (M < 0) {
3679 KnownUndef.setBit(i);
3680 } else if (M < (int)NumElts) {
3681 if (UndefLHS[M])
3682 KnownUndef.setBit(i);
3683 if (ZeroLHS[M])
3684 KnownZero.setBit(i);
3685 } else {
3686 if (UndefRHS[M - NumElts])
3687 KnownUndef.setBit(i);
3688 if (ZeroRHS[M - NumElts])
3689 KnownZero.setBit(i);
3690 }
3691 }
3692 break;
3693 }
3697 APInt SrcUndef, SrcZero;
3698 SDValue Src = Op.getOperand(0);
3699 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3700 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3701 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3702 Depth + 1))
3703 return true;
3704 KnownZero = SrcZero.zextOrTrunc(NumElts);
3705 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3706
3707 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3708 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3709 DemandedSrcElts == 1) {
3710 // aext - if we just need the bottom element then we can bitcast.
3711 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3712 }
3713
3714 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3715 // zext(undef) upper bits are guaranteed to be zero.
3716 if (DemandedElts.isSubsetOf(KnownUndef))
3717 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3718 KnownUndef.clearAllBits();
3719
3720 // zext - if we just need the bottom element then we can mask:
3721 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3722 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3723 Op->isOnlyUserOf(Src.getNode()) &&
3724 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3725 SDLoc DL(Op);
3726 EVT SrcVT = Src.getValueType();
3727 EVT SrcSVT = SrcVT.getScalarType();
3728
3729 // If we're after type legalization and SrcSVT is not legal, use the
3730 // promoted type for creating constants to avoid creating nodes with
3731 // illegal types.
3733 SrcSVT = getLegalTypeToTransformTo(*TLO.DAG.getContext(), SrcSVT);
3734
3735 SmallVector<SDValue> MaskElts;
3736 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3737 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3738 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3739 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3740 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3741 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3742 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3743 }
3744 }
3745 }
3746 break;
3747 }
3748
3749 // TODO: There are more binop opcodes that could be handled here - MIN,
3750 // MAX, saturated math, etc.
3751 case ISD::ADD: {
3752 SDValue Op0 = Op.getOperand(0);
3753 SDValue Op1 = Op.getOperand(1);
3754 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3755 APInt UndefLHS, ZeroLHS;
3756 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3757 Depth + 1, /*AssumeSingleUse*/ true))
3758 return true;
3759 }
3760 [[fallthrough]];
3761 }
3762 case ISD::AVGCEILS:
3763 case ISD::AVGCEILU:
3764 case ISD::AVGFLOORS:
3765 case ISD::AVGFLOORU:
3766 case ISD::OR:
3767 case ISD::XOR:
3768 case ISD::SUB:
3769 case ISD::FADD:
3770 case ISD::FSUB:
3771 case ISD::FMUL:
3772 case ISD::FDIV:
3773 case ISD::FREM: {
3774 SDValue Op0 = Op.getOperand(0);
3775 SDValue Op1 = Op.getOperand(1);
3776
3777 APInt UndefRHS, ZeroRHS;
3778 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3779 Depth + 1))
3780 return true;
3781 APInt UndefLHS, ZeroLHS;
3782 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3783 Depth + 1))
3784 return true;
3785
3786 KnownZero = ZeroLHS & ZeroRHS;
3787 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3788
3789 // Attempt to avoid multi-use ops if we don't need anything from them.
3790 // TODO - use KnownUndef to relax the demandedelts?
3791 if (!DemandedElts.isAllOnes())
3792 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3793 return true;
3794 break;
3795 }
3796 case ISD::SHL:
3797 case ISD::SRL:
3798 case ISD::SRA:
3799 case ISD::ROTL:
3800 case ISD::ROTR: {
3801 SDValue Op0 = Op.getOperand(0);
3802 SDValue Op1 = Op.getOperand(1);
3803
3804 APInt UndefRHS, ZeroRHS;
3805 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3806 Depth + 1))
3807 return true;
3808 APInt UndefLHS, ZeroLHS;
3809 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3810 Depth + 1))
3811 return true;
3812
3813 KnownZero = ZeroLHS;
3814 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3815
3816 // Attempt to avoid multi-use ops if we don't need anything from them.
3817 // TODO - use KnownUndef to relax the demandedelts?
3818 if (!DemandedElts.isAllOnes())
3819 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3820 return true;
3821 break;
3822 }
3823 case ISD::MUL:
3824 case ISD::MULHU:
3825 case ISD::MULHS:
3826 case ISD::AND: {
3827 SDValue Op0 = Op.getOperand(0);
3828 SDValue Op1 = Op.getOperand(1);
3829
3830 APInt SrcUndef, SrcZero;
3831 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3832 Depth + 1))
3833 return true;
3834 // FIXME: If we know that a demanded element was zero in Op1 we don't need
3835 // to demand it in Op0 - its guaranteed to be zero. There is however a
3836 // restriction, as we must not make any of the originally demanded elements
3837 // more poisonous. We could reduce amount of elements demanded, but then we
3838 // also need a to inform SimplifyDemandedVectorElts that some elements must
3839 // not be made more poisonous.
3840 if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
3841 TLO, Depth + 1))
3842 return true;
3843
3844 KnownUndef &= DemandedElts;
3845 KnownZero &= DemandedElts;
3846
3847 // If every element pair has a zero/undef/poison then just fold to zero.
3848 // fold (and x, undef/poison) -> 0 / (and x, 0) -> 0
3849 // fold (mul x, undef/poison) -> 0 / (mul x, 0) -> 0
3850 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3851 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3852
3853 // If either side has a zero element, then the result element is zero, even
3854 // if the other is an UNDEF.
3855 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3856 // and then handle 'and' nodes with the rest of the binop opcodes.
3857 KnownZero |= SrcZero;
3858 KnownUndef &= SrcUndef;
3859 KnownUndef &= ~KnownZero;
3860
3861 // Attempt to avoid multi-use ops if we don't need anything from them.
3862 if (!DemandedElts.isAllOnes())
3863 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3864 return true;
3865 break;
3866 }
3867 case ISD::TRUNCATE:
3868 case ISD::SIGN_EXTEND:
3869 case ISD::ZERO_EXTEND:
3870 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3871 KnownZero, TLO, Depth + 1))
3872 return true;
3873
3874 if (!DemandedElts.isAllOnes())
3876 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3877 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3878
3879 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3880 // zext(undef) upper bits are guaranteed to be zero.
3881 if (DemandedElts.isSubsetOf(KnownUndef))
3882 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3883 KnownUndef.clearAllBits();
3884 }
3885 break;
3886 case ISD::SINT_TO_FP:
3887 case ISD::UINT_TO_FP:
3888 case ISD::FP_TO_SINT:
3889 case ISD::FP_TO_UINT:
3890 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3891 KnownZero, TLO, Depth + 1))
3892 return true;
3893 // Don't fall through to generic undef -> undef handling.
3894 return false;
3895 default: {
3896 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3897 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3898 KnownZero, TLO, Depth))
3899 return true;
3900 } else {
3901 KnownBits Known;
3902 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3903 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3904 TLO, Depth, AssumeSingleUse))
3905 return true;
3906 }
3907 break;
3908 }
3909 }
3910 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3911
3912 // Constant fold all undef cases.
3913 // TODO: Handle zero cases as well.
3914 if (DemandedElts.isSubsetOf(KnownUndef))
3915 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3916
3917 return false;
3918}
3919
3920/// Determine which of the bits specified in Mask are known to be either zero or
3921/// one and return them in the Known.
3923 KnownBits &Known,
3924 const APInt &DemandedElts,
3925 const SelectionDAG &DAG,
3926 unsigned Depth) const {
3927 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3928 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3929 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3930 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3931 "Should use MaskedValueIsZero if you don't know whether Op"
3932 " is a target node!");
3933 Known.resetAll();
3934}
3935
3938 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3939 unsigned Depth) const {
3940 Known.resetAll();
3941}
3942
3945 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3946 unsigned Depth) const {
3947 Known.resetAll();
3948}
3949
3951 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3952 // The low bits are known zero if the pointer is aligned.
3953 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3954}
3955
3961
3962/// This method can be implemented by targets that want to expose additional
3963/// information about sign bits to the DAG Combiner.
3965 const APInt &,
3966 const SelectionDAG &,
3967 unsigned Depth) const {
3968 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3969 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3970 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3971 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3972 "Should use ComputeNumSignBits if you don't know whether Op"
3973 " is a target node!");
3974 return 1;
3975}
3976
3978 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3979 const MachineRegisterInfo &MRI, unsigned Depth) const {
3980 return 1;
3981}
3982
3984 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3985 TargetLoweringOpt &TLO, unsigned Depth) const {
3986 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3987 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3988 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3989 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3990 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3991 " is a target node!");
3992 return false;
3993}
3994
3996 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3997 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3998 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3999 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4000 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4001 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4002 "Should use SimplifyDemandedBits if you don't know whether Op"
4003 " is a target node!");
4004 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
4005 return false;
4006}
4007
4009 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4010 SelectionDAG &DAG, unsigned Depth) const {
4011 assert(
4012 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4013 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4014 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4015 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4016 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
4017 " is a target node!");
4018 return SDValue();
4019}
4020
4021SDValue
4024 SelectionDAG &DAG) const {
4025 bool LegalMask = isShuffleMaskLegal(Mask, VT);
4026 if (!LegalMask) {
4027 std::swap(N0, N1);
4029 LegalMask = isShuffleMaskLegal(Mask, VT);
4030 }
4031
4032 if (!LegalMask)
4033 return SDValue();
4034
4035 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
4036}
4037
4039 return nullptr;
4040}
4041
4043 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4044 bool PoisonOnly, unsigned Depth) const {
4045 assert(
4046 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4047 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4048 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4049 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4050 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4051 " is a target node!");
4052
4053 // If Op can't create undef/poison and none of its operands are undef/poison
4054 // then Op is never undef/poison.
4055 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
4056 /*ConsiderFlags*/ true, Depth) &&
4057 all_of(Op->ops(), [&](SDValue V) {
4058 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
4059 Depth + 1);
4060 });
4061}
4062
4064 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4065 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
4066 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4067 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4068 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4069 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4070 "Should use canCreateUndefOrPoison if you don't know whether Op"
4071 " is a target node!");
4072 // Be conservative and return true.
4073 return true;
4074}
4075
4077 const APInt &DemandedElts,
4078 const SelectionDAG &DAG,
4079 bool SNaN,
4080 unsigned Depth) const {
4081 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4082 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4083 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4084 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4085 "Should use isKnownNeverNaN if you don't know whether Op"
4086 " is a target node!");
4087 return false;
4088}
4089
4091 const APInt &DemandedElts,
4092 APInt &UndefElts,
4093 const SelectionDAG &DAG,
4094 unsigned Depth) const {
4095 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4096 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4097 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4098 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4099 "Should use isSplatValue if you don't know whether Op"
4100 " is a target node!");
4101 return false;
4102}
4103
4104// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4105// work with truncating build vectors and vectors with elements of less than
4106// 8 bits.
4108 if (!N)
4109 return false;
4110
4111 unsigned EltWidth;
4112 APInt CVal;
4113 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4114 /*AllowTruncation=*/true)) {
4115 CVal = CN->getAPIntValue();
4116 EltWidth = N.getValueType().getScalarSizeInBits();
4117 } else
4118 return false;
4119
4120 // If this is a truncating splat, truncate the splat value.
4121 // Otherwise, we may fail to match the expected values below.
4122 if (EltWidth < CVal.getBitWidth())
4123 CVal = CVal.trunc(EltWidth);
4124
4125 switch (getBooleanContents(N.getValueType())) {
4127 return CVal[0];
4129 return CVal.isOne();
4131 return CVal.isAllOnes();
4132 }
4133
4134 llvm_unreachable("Invalid boolean contents");
4135}
4136
4138 if (!N)
4139 return false;
4140
4142 if (!CN) {
4144 if (!BV)
4145 return false;
4146
4147 // Only interested in constant splats, we don't care about undef
4148 // elements in identifying boolean constants and getConstantSplatNode
4149 // returns NULL if all ops are undef;
4150 CN = BV->getConstantSplatNode();
4151 if (!CN)
4152 return false;
4153 }
4154
4155 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4156 return !CN->getAPIntValue()[0];
4157
4158 return CN->isZero();
4159}
4160
4162 bool SExt) const {
4163 if (VT == MVT::i1)
4164 return N->isOne();
4165
4167 switch (Cnt) {
4169 // An extended value of 1 is always true, unless its original type is i1,
4170 // in which case it will be sign extended to -1.
4171 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4174 return N->isAllOnes() && SExt;
4175 }
4176 llvm_unreachable("Unexpected enumeration.");
4177}
4178
4179/// This helper function of SimplifySetCC tries to optimize the comparison when
4180/// either operand of the SetCC node is a bitwise-and instruction.
4181SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4182 ISD::CondCode Cond, const SDLoc &DL,
4183 DAGCombinerInfo &DCI) const {
4184 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4185 std::swap(N0, N1);
4186
4187 SelectionDAG &DAG = DCI.DAG;
4188 EVT OpVT = N0.getValueType();
4189 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4190 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4191 return SDValue();
4192
4193 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4194 // iff everything but LSB is known zero:
4195 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4198 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4199 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4200 if (DAG.MaskedValueIsZero(N0, UpperBits))
4201 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4202 }
4203
4204 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4205 // test in a narrow type that we can truncate to with no cost. Examples:
4206 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4207 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4208 // TODO: This conservatively checks for type legality on the source and
4209 // destination types. That may inhibit optimizations, but it also
4210 // allows setcc->shift transforms that may be more beneficial.
4211 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4212 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4213 isTypeLegal(OpVT) && N0.hasOneUse()) {
4214 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4215 AndC->getAPIntValue().getActiveBits());
4216 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4217 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4218 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4219 return DAG.getSetCC(DL, VT, Trunc, Zero,
4221 }
4222 }
4223
4224 // Match these patterns in any of their permutations:
4225 // (X & Y) == Y
4226 // (X & Y) != Y
4227 SDValue X, Y;
4228 if (N0.getOperand(0) == N1) {
4229 X = N0.getOperand(1);
4230 Y = N0.getOperand(0);
4231 } else if (N0.getOperand(1) == N1) {
4232 X = N0.getOperand(0);
4233 Y = N0.getOperand(1);
4234 } else {
4235 return SDValue();
4236 }
4237
4238 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4239 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4240 // its liable to create and infinite loop.
4241 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4242 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4244 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4245 // Note that where Y is variable and is known to have at most one bit set
4246 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4247 // equivalent when Y == 0.
4248 assert(OpVT.isInteger());
4250 if (DCI.isBeforeLegalizeOps() ||
4252 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4253 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4254 // If the target supports an 'and-not' or 'and-complement' logic operation,
4255 // try to use that to make a comparison operation more efficient.
4256 // But don't do this transform if the mask is a single bit because there are
4257 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4258 // 'rlwinm' on PPC).
4259
4260 // Bail out if the compare operand that we want to turn into a zero is
4261 // already a zero (otherwise, infinite loop).
4262 if (isNullConstant(Y))
4263 return SDValue();
4264
4265 // Transform this into: ~X & Y == 0.
4266 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4267 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4268 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4269 }
4270
4271 return SDValue();
4272}
4273
4274/// This helper function of SimplifySetCC tries to optimize the comparison when
4275/// either operand of the SetCC node is a bitwise-or instruction.
4276/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4277SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4278 ISD::CondCode Cond, const SDLoc &DL,
4279 DAGCombinerInfo &DCI) const {
4280 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4281 std::swap(N0, N1);
4282
4283 SelectionDAG &DAG = DCI.DAG;
4284 EVT OpVT = N0.getValueType();
4285 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4286 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4287 return SDValue();
4288
4289 // (X | Y) == Y
4290 // (X | Y) != Y
4291 SDValue X;
4292 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4293 // If the target supports an 'and-not' or 'and-complement' logic operation,
4294 // try to use that to make a comparison operation more efficient.
4295
4296 // Bail out if the compare operand that we want to turn into a zero is
4297 // already a zero (otherwise, infinite loop).
4298 if (isNullConstant(N1))
4299 return SDValue();
4300
4301 // Transform this into: X & ~Y ==/!= 0.
4302 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4303 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4304 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4305 }
4306
4307 return SDValue();
4308}
4309
4310/// There are multiple IR patterns that could be checking whether certain
4311/// truncation of a signed number would be lossy or not. The pattern which is
4312/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4313/// We are looking for the following pattern: (KeptBits is a constant)
4314/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4315/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4316/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4317/// We will unfold it into the natural trunc+sext pattern:
4318/// ((%x << C) a>> C) dstcond %x
4319/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4320SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4321 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4322 const SDLoc &DL) const {
4323 // We must be comparing with a constant.
4324 ConstantSDNode *C1;
4325 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4326 return SDValue();
4327
4328 // N0 should be: add %x, (1 << (KeptBits-1))
4329 if (N0->getOpcode() != ISD::ADD)
4330 return SDValue();
4331
4332 // And we must be 'add'ing a constant.
4333 ConstantSDNode *C01;
4334 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4335 return SDValue();
4336
4337 SDValue X = N0->getOperand(0);
4338 EVT XVT = X.getValueType();
4339
4340 // Validate constants ...
4341
4342 APInt I1 = C1->getAPIntValue();
4343
4344 ISD::CondCode NewCond;
4345 if (Cond == ISD::CondCode::SETULT) {
4346 NewCond = ISD::CondCode::SETEQ;
4347 } else if (Cond == ISD::CondCode::SETULE) {
4348 NewCond = ISD::CondCode::SETEQ;
4349 // But need to 'canonicalize' the constant.
4350 I1 += 1;
4351 } else if (Cond == ISD::CondCode::SETUGT) {
4352 NewCond = ISD::CondCode::SETNE;
4353 // But need to 'canonicalize' the constant.
4354 I1 += 1;
4355 } else if (Cond == ISD::CondCode::SETUGE) {
4356 NewCond = ISD::CondCode::SETNE;
4357 } else
4358 return SDValue();
4359
4360 APInt I01 = C01->getAPIntValue();
4361
4362 auto checkConstants = [&I1, &I01]() -> bool {
4363 // Both of them must be power-of-two, and the constant from setcc is bigger.
4364 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4365 };
4366
4367 if (checkConstants()) {
4368 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4369 } else {
4370 // What if we invert constants? (and the target predicate)
4371 I1.negate();
4372 I01.negate();
4373 assert(XVT.isInteger());
4374 NewCond = getSetCCInverse(NewCond, XVT);
4375 if (!checkConstants())
4376 return SDValue();
4377 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4378 }
4379
4380 // They are power-of-two, so which bit is set?
4381 const unsigned KeptBits = I1.logBase2();
4382 const unsigned KeptBitsMinusOne = I01.logBase2();
4383
4384 // Magic!
4385 if (KeptBits != (KeptBitsMinusOne + 1))
4386 return SDValue();
4387 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4388
4389 // We don't want to do this in every single case.
4390 SelectionDAG &DAG = DCI.DAG;
4391 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4392 return SDValue();
4393
4394 // Unfold into: sext_inreg(%x) cond %x
4395 // Where 'cond' will be either 'eq' or 'ne'.
4396 SDValue SExtInReg = DAG.getNode(
4398 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4399 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4400}
4401
4402// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4403SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4404 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4405 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4407 "Should be a comparison with 0.");
4408 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4409 "Valid only for [in]equality comparisons.");
4410
4411 unsigned NewShiftOpcode;
4412 SDValue X, C, Y;
4413
4414 SelectionDAG &DAG = DCI.DAG;
4415
4416 // Look for '(C l>>/<< Y)'.
4417 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4418 // The shift should be one-use.
4419 if (!V.hasOneUse())
4420 return false;
4421 unsigned OldShiftOpcode = V.getOpcode();
4422 switch (OldShiftOpcode) {
4423 case ISD::SHL:
4424 NewShiftOpcode = ISD::SRL;
4425 break;
4426 case ISD::SRL:
4427 NewShiftOpcode = ISD::SHL;
4428 break;
4429 default:
4430 return false; // must be a logical shift.
4431 }
4432 // We should be shifting a constant.
4433 // FIXME: best to use isConstantOrConstantVector().
4434 C = V.getOperand(0);
4435 ConstantSDNode *CC =
4436 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4437 if (!CC)
4438 return false;
4439 Y = V.getOperand(1);
4440
4441 ConstantSDNode *XC =
4442 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4444 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4445 };
4446
4447 // LHS of comparison should be an one-use 'and'.
4448 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4449 return SDValue();
4450
4451 X = N0.getOperand(0);
4452 SDValue Mask = N0.getOperand(1);
4453
4454 // 'and' is commutative!
4455 if (!Match(Mask)) {
4456 std::swap(X, Mask);
4457 if (!Match(Mask))
4458 return SDValue();
4459 }
4460
4461 EVT VT = X.getValueType();
4462
4463 // Produce:
4464 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4465 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4466 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4467 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4468 return T2;
4469}
4470
4471/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4472/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4473/// handle the commuted versions of these patterns.
4474SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4475 ISD::CondCode Cond, const SDLoc &DL,
4476 DAGCombinerInfo &DCI) const {
4477 unsigned BOpcode = N0.getOpcode();
4478 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4479 "Unexpected binop");
4480 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4481
4482 // (X + Y) == X --> Y == 0
4483 // (X - Y) == X --> Y == 0
4484 // (X ^ Y) == X --> Y == 0
4485 SelectionDAG &DAG = DCI.DAG;
4486 EVT OpVT = N0.getValueType();
4487 SDValue X = N0.getOperand(0);
4488 SDValue Y = N0.getOperand(1);
4489 if (X == N1)
4490 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4491
4492 if (Y != N1)
4493 return SDValue();
4494
4495 // (X + Y) == Y --> X == 0
4496 // (X ^ Y) == Y --> X == 0
4497 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4498 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4499
4500 // The shift would not be valid if the operands are boolean (i1).
4501 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4502 return SDValue();
4503
4504 // (X - Y) == Y --> X == Y << 1
4505 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4506 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4507 if (!DCI.isCalledByLegalizer())
4508 DCI.AddToWorklist(YShl1.getNode());
4509 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4510}
4511
4513 SDValue N0, const APInt &C1,
4514 ISD::CondCode Cond, const SDLoc &dl,
4515 SelectionDAG &DAG) {
4516 // Look through truncs that don't change the value of a ctpop.
4517 // FIXME: Add vector support? Need to be careful with setcc result type below.
4518 SDValue CTPOP = N0;
4519 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4521 CTPOP = N0.getOperand(0);
4522
4523 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4524 return SDValue();
4525
4526 EVT CTVT = CTPOP.getValueType();
4527 SDValue CTOp = CTPOP.getOperand(0);
4528
4529 // Expand a power-of-2-or-zero comparison based on ctpop:
4530 // (ctpop x) u< 2 -> (x & x-1) == 0
4531 // (ctpop x) u> 1 -> (x & x-1) != 0
4532 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4533 // Keep the CTPOP if it is a cheap vector op.
4534 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4535 return SDValue();
4536
4537 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4538 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4539 return SDValue();
4540 if (C1 == 0 && (Cond == ISD::SETULT))
4541 return SDValue(); // This is handled elsewhere.
4542
4543 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4544
4545 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4546 SDValue Result = CTOp;
4547 for (unsigned i = 0; i < Passes; i++) {
4548 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4549 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4550 }
4552 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4553 }
4554
4555 // Expand a power-of-2 comparison based on ctpop
4556 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4557 // Keep the CTPOP if it is cheap.
4558 if (TLI.isCtpopFast(CTVT))
4559 return SDValue();
4560
4561 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4562 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4563 assert(CTVT.isInteger());
4564 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4565
4566 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4567 // check before emitting a potentially unnecessary op.
4568 if (DAG.isKnownNeverZero(CTOp)) {
4569 // (ctpop x) == 1 --> (x & x-1) == 0
4570 // (ctpop x) != 1 --> (x & x-1) != 0
4571 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4572 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4573 return RHS;
4574 }
4575
4576 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4577 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4578 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4580 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4581 }
4582
4583 return SDValue();
4584}
4585
4587 ISD::CondCode Cond, const SDLoc &dl,
4588 SelectionDAG &DAG) {
4589 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4590 return SDValue();
4591
4592 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4593 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4594 return SDValue();
4595
4596 auto getRotateSource = [](SDValue X) {
4597 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4598 return X.getOperand(0);
4599 return SDValue();
4600 };
4601
4602 // Peek through a rotated value compared against 0 or -1:
4603 // (rot X, Y) == 0/-1 --> X == 0/-1
4604 // (rot X, Y) != 0/-1 --> X != 0/-1
4605 if (SDValue R = getRotateSource(N0))
4606 return DAG.getSetCC(dl, VT, R, N1, Cond);
4607
4608 // Peek through an 'or' of a rotated value compared against 0:
4609 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4610 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4611 //
4612 // TODO: Add the 'and' with -1 sibling.
4613 // TODO: Recurse through a series of 'or' ops to find the rotate.
4614 EVT OpVT = N0.getValueType();
4615 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4616 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4617 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4618 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4619 }
4620 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4621 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4622 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4623 }
4624 }
4625
4626 return SDValue();
4627}
4628
4630 ISD::CondCode Cond, const SDLoc &dl,
4631 SelectionDAG &DAG) {
4632 // If we are testing for all-bits-clear, we might be able to do that with
4633 // less shifting since bit-order does not matter.
4634 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4635 return SDValue();
4636
4637 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4638 if (!C1 || !C1->isZero())
4639 return SDValue();
4640
4641 if (!N0.hasOneUse() ||
4642 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4643 return SDValue();
4644
4645 unsigned BitWidth = N0.getScalarValueSizeInBits();
4646 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4647 if (!ShAmtC)
4648 return SDValue();
4649
4650 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4651 if (ShAmt == 0)
4652 return SDValue();
4653
4654 // Canonicalize fshr as fshl to reduce pattern-matching.
4655 if (N0.getOpcode() == ISD::FSHR)
4656 ShAmt = BitWidth - ShAmt;
4657
4658 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4659 SDValue X, Y;
4660 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4661 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4662 return false;
4663 if (Or.getOperand(0) == Other) {
4664 X = Or.getOperand(0);
4665 Y = Or.getOperand(1);
4666 return true;
4667 }
4668 if (Or.getOperand(1) == Other) {
4669 X = Or.getOperand(1);
4670 Y = Or.getOperand(0);
4671 return true;
4672 }
4673 return false;
4674 };
4675
4676 EVT OpVT = N0.getValueType();
4677 EVT ShAmtVT = N0.getOperand(2).getValueType();
4678 SDValue F0 = N0.getOperand(0);
4679 SDValue F1 = N0.getOperand(1);
4680 if (matchOr(F0, F1)) {
4681 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4682 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4683 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4684 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4685 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4686 }
4687 if (matchOr(F1, F0)) {
4688 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4689 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4690 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4691 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4692 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4693 }
4694
4695 return SDValue();
4696}
4697
4698/// Try to simplify a setcc built with the specified operands and cc. If it is
4699/// unable to simplify it, return a null SDValue.
4701 ISD::CondCode Cond, bool foldBooleans,
4702 DAGCombinerInfo &DCI,
4703 const SDLoc &dl) const {
4704 SelectionDAG &DAG = DCI.DAG;
4705 const DataLayout &Layout = DAG.getDataLayout();
4706 EVT OpVT = N0.getValueType();
4707 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4708
4709 // Constant fold or commute setcc.
4710 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4711 return Fold;
4712
4713 bool N0ConstOrSplat =
4714 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4715 bool N1ConstOrSplat =
4716 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4717
4718 // Canonicalize toward having the constant on the RHS.
4719 // TODO: Handle non-splat vector constants. All undef causes trouble.
4720 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4721 // infinite loop here when we encounter one.
4723 if (N0ConstOrSplat && !N1ConstOrSplat &&
4724 (DCI.isBeforeLegalizeOps() ||
4725 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4726 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4727
4728 // If we have a subtract with the same 2 non-constant operands as this setcc
4729 // -- but in reverse order -- then try to commute the operands of this setcc
4730 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4731 // instruction on some targets.
4732 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4733 (DCI.isBeforeLegalizeOps() ||
4734 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4735 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4736 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4737 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4738
4739 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4740 return V;
4741
4742 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4743 return V;
4744
4745 if (auto *N1C = isConstOrConstSplat(N1)) {
4746 const APInt &C1 = N1C->getAPIntValue();
4747
4748 // Optimize some CTPOP cases.
4749 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4750 return V;
4751
4752 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4753 // X * Y == 0 --> (X == 0) || (Y == 0)
4754 // X * Y != 0 --> (X != 0) && (Y != 0)
4755 // TODO: This bails out if minsize is set, but if the target doesn't have a
4756 // single instruction multiply for this type, it would likely be
4757 // smaller to decompose.
4758 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4759 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4760 (N0->getFlags().hasNoUnsignedWrap() ||
4761 N0->getFlags().hasNoSignedWrap()) &&
4762 !Attr.hasFnAttr(Attribute::MinSize)) {
4763 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4764 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4765 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4766 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4767 }
4768
4769 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4770 // equality comparison, then we're just comparing whether X itself is
4771 // zero.
4772 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4773 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4775 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4776 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4777 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4778 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4779 // (srl (ctlz x), 5) == 0 -> X != 0
4780 // (srl (ctlz x), 5) != 1 -> X != 0
4781 Cond = ISD::SETNE;
4782 } else {
4783 // (srl (ctlz x), 5) != 0 -> X == 0
4784 // (srl (ctlz x), 5) == 1 -> X == 0
4785 Cond = ISD::SETEQ;
4786 }
4787 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4788 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4789 Cond);
4790 }
4791 }
4792 }
4793 }
4794
4795 // setcc X, 0, setlt --> X (when X is all sign bits)
4796 // setcc X, 0, setne --> X (when X is all sign bits)
4797 //
4798 // When we know that X has 0 or -1 in each element (or scalar), this
4799 // comparison will produce X. This is only true when boolean contents are
4800 // represented via 0s and -1s.
4801 if (VT == OpVT &&
4802 // Check that the result of setcc is 0 and -1.
4804 // Match only for checks X < 0 and X != 0
4805 (Cond == ISD::SETLT || Cond == ISD::SETNE) && isNullOrNullSplat(N1) &&
4806 // The identity holds iff we know all sign bits for all lanes.
4808 return N0;
4809
4810 // FIXME: Support vectors.
4811 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4812 const APInt &C1 = N1C->getAPIntValue();
4813
4814 // (zext x) == C --> x == (trunc C)
4815 // (sext x) == C --> x == (trunc C)
4816 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4817 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4818 unsigned MinBits = N0.getValueSizeInBits();
4819 SDValue PreExt;
4820 bool Signed = false;
4821 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4822 // ZExt
4823 MinBits = N0->getOperand(0).getValueSizeInBits();
4824 PreExt = N0->getOperand(0);
4825 } else if (N0->getOpcode() == ISD::AND) {
4826 // DAGCombine turns costly ZExts into ANDs
4827 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4828 if ((C->getAPIntValue()+1).isPowerOf2()) {
4829 MinBits = C->getAPIntValue().countr_one();
4830 PreExt = N0->getOperand(0);
4831 }
4832 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4833 // SExt
4834 MinBits = N0->getOperand(0).getValueSizeInBits();
4835 PreExt = N0->getOperand(0);
4836 Signed = true;
4837 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4838 // ZEXTLOAD / SEXTLOAD
4839 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4840 MinBits = LN0->getMemoryVT().getSizeInBits();
4841 PreExt = N0;
4842 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4843 Signed = true;
4844 MinBits = LN0->getMemoryVT().getSizeInBits();
4845 PreExt = N0;
4846 }
4847 }
4848
4849 // Figure out how many bits we need to preserve this constant.
4850 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4851
4852 // Make sure we're not losing bits from the constant.
4853 if (MinBits > 0 &&
4854 MinBits < C1.getBitWidth() &&
4855 MinBits >= ReqdBits) {
4856 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4857 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4858 // Will get folded away.
4859 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4860 if (MinBits == 1 && C1 == 1)
4861 // Invert the condition.
4862 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4864 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4865 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4866 }
4867
4868 // If truncating the setcc operands is not desirable, we can still
4869 // simplify the expression in some cases:
4870 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4871 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4872 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4873 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4874 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4875 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4876 SDValue TopSetCC = N0->getOperand(0);
4877 unsigned N0Opc = N0->getOpcode();
4878 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4879 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4880 TopSetCC.getOpcode() == ISD::SETCC &&
4881 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4882 (isConstFalseVal(N1) ||
4883 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4884
4885 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4886 (!N1C->isZero() && Cond == ISD::SETNE);
4887
4888 if (!Inverse)
4889 return TopSetCC;
4890
4892 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4893 TopSetCC.getOperand(0).getValueType());
4894 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4895 TopSetCC.getOperand(1),
4896 InvCond);
4897 }
4898 }
4899 }
4900
4901 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4902 // equality or unsigned, and all 1 bits of the const are in the same
4903 // partial word, see if we can shorten the load.
4904 if (DCI.isBeforeLegalize() &&
4906 N0.getOpcode() == ISD::AND && C1 == 0 &&
4907 N0.getNode()->hasOneUse() &&
4908 isa<LoadSDNode>(N0.getOperand(0)) &&
4909 N0.getOperand(0).getNode()->hasOneUse() &&
4911 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4912 APInt bestMask;
4913 unsigned bestWidth = 0, bestOffset = 0;
4914 if (Lod->isSimple() && Lod->isUnindexed() &&
4915 (Lod->getMemoryVT().isByteSized() ||
4916 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4917 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4918 unsigned origWidth = N0.getValueSizeInBits();
4919 unsigned maskWidth = origWidth;
4920 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4921 // 8 bits, but have to be careful...
4922 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4923 origWidth = Lod->getMemoryVT().getSizeInBits();
4924 const APInt &Mask = N0.getConstantOperandAPInt(1);
4925 // Only consider power-of-2 widths (and at least one byte) as candiates
4926 // for the narrowed load.
4927 for (unsigned width = 8; width < origWidth; width *= 2) {
4928 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4929 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4930 // Avoid accessing any padding here for now (we could use memWidth
4931 // instead of origWidth here otherwise).
4932 unsigned maxOffset = origWidth - width;
4933 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4934 if (Mask.isSubsetOf(newMask)) {
4935 unsigned ptrOffset =
4936 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4937 unsigned IsFast = 0;
4938 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4939 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4941 ptrOffset / 8) &&
4943 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4944 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4945 IsFast) {
4946 bestOffset = ptrOffset / 8;
4947 bestMask = Mask.lshr(offset);
4948 bestWidth = width;
4949 break;
4950 }
4951 }
4952 newMask <<= 8;
4953 }
4954 if (bestWidth)
4955 break;
4956 }
4957 }
4958 if (bestWidth) {
4959 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4960 SDValue Ptr = Lod->getBasePtr();
4961 if (bestOffset != 0)
4962 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4963 SDValue NewLoad =
4964 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4965 Lod->getPointerInfo().getWithOffset(bestOffset),
4966 Lod->getBaseAlign());
4967 SDValue And =
4968 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4969 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4970 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4971 }
4972 }
4973
4974 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4975 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4976 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4977
4978 // If the comparison constant has bits in the upper part, the
4979 // zero-extended value could never match.
4981 C1.getBitWidth() - InSize))) {
4982 switch (Cond) {
4983 case ISD::SETUGT:
4984 case ISD::SETUGE:
4985 case ISD::SETEQ:
4986 return DAG.getConstant(0, dl, VT);
4987 case ISD::SETULT:
4988 case ISD::SETULE:
4989 case ISD::SETNE:
4990 return DAG.getConstant(1, dl, VT);
4991 case ISD::SETGT:
4992 case ISD::SETGE:
4993 // True if the sign bit of C1 is set.
4994 return DAG.getConstant(C1.isNegative(), dl, VT);
4995 case ISD::SETLT:
4996 case ISD::SETLE:
4997 // True if the sign bit of C1 isn't set.
4998 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4999 default:
5000 break;
5001 }
5002 }
5003
5004 // Otherwise, we can perform the comparison with the low bits.
5005 switch (Cond) {
5006 case ISD::SETEQ:
5007 case ISD::SETNE:
5008 case ISD::SETUGT:
5009 case ISD::SETUGE:
5010 case ISD::SETULT:
5011 case ISD::SETULE: {
5012 EVT newVT = N0.getOperand(0).getValueType();
5013 // FIXME: Should use isNarrowingProfitable.
5014 if (DCI.isBeforeLegalizeOps() ||
5015 (isOperationLegal(ISD::SETCC, newVT) &&
5016 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
5018 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
5019 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
5020
5021 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
5022 NewConst, Cond);
5023 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
5024 }
5025 break;
5026 }
5027 default:
5028 break; // todo, be more careful with signed comparisons
5029 }
5030 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
5031 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5033 OpVT)) {
5034 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
5035 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
5036 EVT ExtDstTy = N0.getValueType();
5037 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
5038
5039 // If the constant doesn't fit into the number of bits for the source of
5040 // the sign extension, it is impossible for both sides to be equal.
5041 if (C1.getSignificantBits() > ExtSrcTyBits)
5042 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
5043
5044 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
5045 ExtDstTy != ExtSrcTy && "Unexpected types!");
5046 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
5047 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
5048 DAG.getConstant(Imm, dl, ExtDstTy));
5049 if (!DCI.isCalledByLegalizer())
5050 DCI.AddToWorklist(ZextOp.getNode());
5051 // Otherwise, make this a use of a zext.
5052 return DAG.getSetCC(dl, VT, ZextOp,
5053 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
5054 } else if ((N1C->isZero() || N1C->isOne()) &&
5055 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5056 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5057 // excluded as they are handled below whilst checking for foldBooleans.
5058 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5059 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
5060 (N0.getValueType() == MVT::i1 ||
5064 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5065 if (TrueWhenTrue)
5066 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
5067 // Invert the condition.
5068 if (N0.getOpcode() == ISD::SETCC) {
5071 if (DCI.isBeforeLegalizeOps() ||
5073 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5074 }
5075 }
5076
5077 if ((N0.getOpcode() == ISD::XOR ||
5078 (N0.getOpcode() == ISD::AND &&
5079 N0.getOperand(0).getOpcode() == ISD::XOR &&
5080 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5081 isOneConstant(N0.getOperand(1))) {
5082 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5083 // can only do this if the top bits are known zero.
5084 unsigned BitWidth = N0.getValueSizeInBits();
5085 if (DAG.MaskedValueIsZero(N0,
5087 BitWidth-1))) {
5088 // Okay, get the un-inverted input value.
5089 SDValue Val;
5090 if (N0.getOpcode() == ISD::XOR) {
5091 Val = N0.getOperand(0);
5092 } else {
5093 assert(N0.getOpcode() == ISD::AND &&
5094 N0.getOperand(0).getOpcode() == ISD::XOR);
5095 // ((X^1)&1)^1 -> X & 1
5096 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5097 N0.getOperand(0).getOperand(0),
5098 N0.getOperand(1));
5099 }
5100
5101 return DAG.getSetCC(dl, VT, Val, N1,
5103 }
5104 } else if (N1C->isOne()) {
5105 SDValue Op0 = N0;
5106 if (Op0.getOpcode() == ISD::TRUNCATE)
5107 Op0 = Op0.getOperand(0);
5108
5109 if ((Op0.getOpcode() == ISD::XOR) &&
5110 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5111 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5112 SDValue XorLHS = Op0.getOperand(0);
5113 SDValue XorRHS = Op0.getOperand(1);
5114 // Ensure that the input setccs return an i1 type or 0/1 value.
5115 if (Op0.getValueType() == MVT::i1 ||
5120 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5122 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5123 }
5124 }
5125 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5126 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5127 if (Op0.getValueType().bitsGT(VT))
5128 Op0 = DAG.getNode(ISD::AND, dl, VT,
5129 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5130 DAG.getConstant(1, dl, VT));
5131 else if (Op0.getValueType().bitsLT(VT))
5132 Op0 = DAG.getNode(ISD::AND, dl, VT,
5133 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5134 DAG.getConstant(1, dl, VT));
5135
5136 return DAG.getSetCC(dl, VT, Op0,
5137 DAG.getConstant(0, dl, Op0.getValueType()),
5139 }
5140 if (Op0.getOpcode() == ISD::AssertZext &&
5141 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5142 return DAG.getSetCC(dl, VT, Op0,
5143 DAG.getConstant(0, dl, Op0.getValueType()),
5145 }
5146 }
5147
5148 // Given:
5149 // icmp eq/ne (urem %x, %y), 0
5150 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5151 // icmp eq/ne %x, 0
5152 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5153 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5154 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5155 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5156 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5157 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5158 }
5159
5160 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5161 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5162 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5164 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5165 N1C->isAllOnes()) {
5166 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5167 DAG.getConstant(0, dl, OpVT),
5169 }
5170
5171 // fold (setcc (trunc x) c) -> (setcc x c)
5172 if (N0.getOpcode() == ISD::TRUNCATE &&
5174 (N0->getFlags().hasNoSignedWrap() &&
5177 EVT NewVT = N0.getOperand(0).getValueType();
5178 SDValue NewConst = DAG.getConstant(
5180 ? C1.sext(NewVT.getSizeInBits())
5181 : C1.zext(NewVT.getSizeInBits()),
5182 dl, NewVT);
5183 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5184 }
5185
5186 if (SDValue V =
5187 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5188 return V;
5189 }
5190
5191 // These simplifications apply to splat vectors as well.
5192 // TODO: Handle more splat vector cases.
5193 if (auto *N1C = isConstOrConstSplat(N1)) {
5194 const APInt &C1 = N1C->getAPIntValue();
5195
5196 APInt MinVal, MaxVal;
5197 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5199 MinVal = APInt::getSignedMinValue(OperandBitSize);
5200 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5201 } else {
5202 MinVal = APInt::getMinValue(OperandBitSize);
5203 MaxVal = APInt::getMaxValue(OperandBitSize);
5204 }
5205
5206 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5207 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5208 // X >= MIN --> true
5209 if (C1 == MinVal)
5210 return DAG.getBoolConstant(true, dl, VT, OpVT);
5211
5212 if (!VT.isVector()) { // TODO: Support this for vectors.
5213 // X >= C0 --> X > (C0 - 1)
5214 APInt C = C1 - 1;
5216 if ((DCI.isBeforeLegalizeOps() ||
5217 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5218 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5219 isLegalICmpImmediate(C.getSExtValue())))) {
5220 return DAG.getSetCC(dl, VT, N0,
5221 DAG.getConstant(C, dl, N1.getValueType()),
5222 NewCC);
5223 }
5224 }
5225 }
5226
5227 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5228 // X <= MAX --> true
5229 if (C1 == MaxVal)
5230 return DAG.getBoolConstant(true, dl, VT, OpVT);
5231
5232 // X <= C0 --> X < (C0 + 1)
5233 if (!VT.isVector()) { // TODO: Support this for vectors.
5234 APInt C = C1 + 1;
5236 if ((DCI.isBeforeLegalizeOps() ||
5237 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5238 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5239 isLegalICmpImmediate(C.getSExtValue())))) {
5240 return DAG.getSetCC(dl, VT, N0,
5241 DAG.getConstant(C, dl, N1.getValueType()),
5242 NewCC);
5243 }
5244 }
5245 }
5246
5247 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5248 if (C1 == MinVal)
5249 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5250
5251 // TODO: Support this for vectors after legalize ops.
5252 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5253 // Canonicalize setlt X, Max --> setne X, Max
5254 if (C1 == MaxVal)
5255 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5256
5257 // If we have setult X, 1, turn it into seteq X, 0
5258 if (C1 == MinVal+1)
5259 return DAG.getSetCC(dl, VT, N0,
5260 DAG.getConstant(MinVal, dl, N0.getValueType()),
5261 ISD::SETEQ);
5262 }
5263 }
5264
5265 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5266 if (C1 == MaxVal)
5267 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5268
5269 // TODO: Support this for vectors after legalize ops.
5270 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5271 // Canonicalize setgt X, Min --> setne X, Min
5272 if (C1 == MinVal)
5273 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5274
5275 // If we have setugt X, Max-1, turn it into seteq X, Max
5276 if (C1 == MaxVal-1)
5277 return DAG.getSetCC(dl, VT, N0,
5278 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5279 ISD::SETEQ);
5280 }
5281 }
5282
5283 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5284 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5285 if (C1.isZero())
5286 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5287 VT, N0, N1, Cond, DCI, dl))
5288 return CC;
5289
5290 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5291 // For example, when high 32-bits of i64 X are known clear:
5292 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5293 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5294 bool CmpZero = N1C->isZero();
5295 bool CmpNegOne = N1C->isAllOnes();
5296 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5297 // Match or(lo,shl(hi,bw/2)) pattern.
5298 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5299 unsigned EltBits = V.getScalarValueSizeInBits();
5300 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5301 return false;
5302 SDValue LHS = V.getOperand(0);
5303 SDValue RHS = V.getOperand(1);
5304 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5305 // Unshifted element must have zero upperbits.
5306 if (RHS.getOpcode() == ISD::SHL &&
5307 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5308 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5309 DAG.MaskedValueIsZero(LHS, HiBits)) {
5310 Lo = LHS;
5311 Hi = RHS.getOperand(0);
5312 return true;
5313 }
5314 if (LHS.getOpcode() == ISD::SHL &&
5315 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5316 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5317 DAG.MaskedValueIsZero(RHS, HiBits)) {
5318 Lo = RHS;
5319 Hi = LHS.getOperand(0);
5320 return true;
5321 }
5322 return false;
5323 };
5324
5325 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5326 unsigned EltBits = N0.getScalarValueSizeInBits();
5327 unsigned HalfBits = EltBits / 2;
5328 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5329 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5330 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5331 SDValue NewN0 =
5332 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5333 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5334 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5335 };
5336
5337 SDValue Lo, Hi;
5338 if (IsConcat(N0, Lo, Hi))
5339 return MergeConcat(Lo, Hi);
5340
5341 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5342 SDValue Lo0, Lo1, Hi0, Hi1;
5343 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5344 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5345 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5346 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5347 }
5348 }
5349 }
5350 }
5351
5352 // If we have "setcc X, C0", check to see if we can shrink the immediate
5353 // by changing cc.
5354 // TODO: Support this for vectors after legalize ops.
5355 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5356 // SETUGT X, SINTMAX -> SETLT X, 0
5357 // SETUGE X, SINTMIN -> SETLT X, 0
5358 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5359 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5360 return DAG.getSetCC(dl, VT, N0,
5361 DAG.getConstant(0, dl, N1.getValueType()),
5362 ISD::SETLT);
5363
5364 // SETULT X, SINTMIN -> SETGT X, -1
5365 // SETULE X, SINTMAX -> SETGT X, -1
5366 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5367 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5368 return DAG.getSetCC(dl, VT, N0,
5369 DAG.getAllOnesConstant(dl, N1.getValueType()),
5370 ISD::SETGT);
5371 }
5372 }
5373
5374 // Back to non-vector simplifications.
5375 // TODO: Can we do these for vector splats?
5376 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5377 const APInt &C1 = N1C->getAPIntValue();
5378 EVT ShValTy = N0.getValueType();
5379
5380 // Fold bit comparisons when we can. This will result in an
5381 // incorrect value when boolean false is negative one, unless
5382 // the bitsize is 1 in which case the false value is the same
5383 // in practice regardless of the representation.
5384 if ((VT.getSizeInBits() == 1 ||
5386 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5387 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5388 N0.getOpcode() == ISD::AND) {
5389 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5390 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5391 // Perform the xform if the AND RHS is a single bit.
5392 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5393 if (AndRHS->getAPIntValue().isPowerOf2() &&
5394 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5395 return DAG.getNode(
5396 ISD::TRUNCATE, dl, VT,
5397 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5398 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5399 }
5400 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5401 // (X & 8) == 8 --> (X & 8) >> 3
5402 // Perform the xform if C1 is a single bit.
5403 unsigned ShCt = C1.logBase2();
5404 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5405 return DAG.getNode(
5406 ISD::TRUNCATE, dl, VT,
5407 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5408 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5409 }
5410 }
5411 }
5412 }
5413
5414 if (C1.getSignificantBits() <= 64 &&
5416 // (X & -256) == 256 -> (X >> 8) == 1
5417 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5418 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5419 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5420 const APInt &AndRHSC = AndRHS->getAPIntValue();
5421 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5422 unsigned ShiftBits = AndRHSC.countr_zero();
5423 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5424 // If using an unsigned shift doesn't yield a legal compare
5425 // immediate, try using sra instead.
5426 APInt NewC = C1.lshr(ShiftBits);
5427 if (NewC.getSignificantBits() <= 64 &&
5429 APInt SignedC = C1.ashr(ShiftBits);
5430 if (SignedC.getSignificantBits() <= 64 &&
5432 SDValue Shift = DAG.getNode(
5433 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5434 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5435 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5436 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5437 }
5438 }
5439 SDValue Shift = DAG.getNode(
5440 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5441 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5442 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5443 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5444 }
5445 }
5446 }
5447 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5448 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5449 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5450 // X < 0x100000000 -> (X >> 32) < 1
5451 // X >= 0x100000000 -> (X >> 32) >= 1
5452 // X <= 0x0ffffffff -> (X >> 32) < 1
5453 // X > 0x0ffffffff -> (X >> 32) >= 1
5454 unsigned ShiftBits;
5455 APInt NewC = C1;
5456 ISD::CondCode NewCond = Cond;
5457 if (AdjOne) {
5458 ShiftBits = C1.countr_one();
5459 NewC = NewC + 1;
5460 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5461 } else {
5462 ShiftBits = C1.countr_zero();
5463 }
5464 NewC.lshrInPlace(ShiftBits);
5465 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5467 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5468 SDValue Shift =
5469 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5470 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5471 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5472 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5473 }
5474 }
5475 }
5476 }
5477
5479 auto *CFP = cast<ConstantFPSDNode>(N1);
5480 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5481
5482 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5483 // constant if knowing that the operand is non-nan is enough. We prefer to
5484 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5485 // materialize 0.0.
5486 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5487 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5488
5489 // setcc (fneg x), C -> setcc swap(pred) x, -C
5490 if (N0.getOpcode() == ISD::FNEG) {
5492 if (DCI.isBeforeLegalizeOps() ||
5493 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5494 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5495 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5496 }
5497 }
5498
5499 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5501 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5502 bool IsFabs = N0.getOpcode() == ISD::FABS;
5503 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5504 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5505 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5506 : (IsFabs ? fcInf : fcPosInf);
5507 if (Cond == ISD::SETUEQ)
5508 Flag |= fcNan;
5509 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5510 DAG.getTargetConstant(Flag, dl, MVT::i32));
5511 }
5512 }
5513
5514 // If the condition is not legal, see if we can find an equivalent one
5515 // which is legal.
5517 // If the comparison was an awkward floating-point == or != and one of
5518 // the comparison operands is infinity or negative infinity, convert the
5519 // condition to a less-awkward <= or >=.
5520 if (CFP->getValueAPF().isInfinity()) {
5521 bool IsNegInf = CFP->getValueAPF().isNegative();
5523 switch (Cond) {
5524 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5525 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5526 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5527 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5528 default: break;
5529 }
5530 if (NewCond != ISD::SETCC_INVALID &&
5531 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5532 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5533 }
5534 }
5535 }
5536
5537 if (N0 == N1) {
5538 // The sext(setcc()) => setcc() optimization relies on the appropriate
5539 // constant being emitted.
5540 assert(!N0.getValueType().isInteger() &&
5541 "Integer types should be handled by FoldSetCC");
5542
5543 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5544 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5545 if (UOF == 2) // FP operators that are undefined on NaNs.
5546 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5547 if (UOF == unsigned(EqTrue))
5548 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5549 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5550 // if it is not already.
5551 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5552 if (NewCond != Cond &&
5553 (DCI.isBeforeLegalizeOps() ||
5554 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5555 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5556 }
5557
5558 // ~X > ~Y --> Y > X
5559 // ~X < ~Y --> Y < X
5560 // ~X < C --> X > ~C
5561 // ~X > C --> X < ~C
5562 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5563 N0.getValueType().isInteger()) {
5564 if (isBitwiseNot(N0)) {
5565 if (isBitwiseNot(N1))
5566 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5567
5570 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5571 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5572 }
5573 }
5574 }
5575
5576 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5577 N0.getValueType().isInteger()) {
5578 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5579 N0.getOpcode() == ISD::XOR) {
5580 // Simplify (X+Y) == (X+Z) --> Y == Z
5581 if (N0.getOpcode() == N1.getOpcode()) {
5582 if (N0.getOperand(0) == N1.getOperand(0))
5583 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5584 if (N0.getOperand(1) == N1.getOperand(1))
5585 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5586 if (isCommutativeBinOp(N0.getOpcode())) {
5587 // If X op Y == Y op X, try other combinations.
5588 if (N0.getOperand(0) == N1.getOperand(1))
5589 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5590 Cond);
5591 if (N0.getOperand(1) == N1.getOperand(0))
5592 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5593 Cond);
5594 }
5595 }
5596
5597 // If RHS is a legal immediate value for a compare instruction, we need
5598 // to be careful about increasing register pressure needlessly.
5599 bool LegalRHSImm = false;
5600
5601 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5602 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5603 // Turn (X+C1) == C2 --> X == C2-C1
5604 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5605 return DAG.getSetCC(
5606 dl, VT, N0.getOperand(0),
5607 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5608 dl, N0.getValueType()),
5609 Cond);
5610
5611 // Turn (X^C1) == C2 --> X == C1^C2
5612 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5613 return DAG.getSetCC(
5614 dl, VT, N0.getOperand(0),
5615 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5616 dl, N0.getValueType()),
5617 Cond);
5618 }
5619
5620 // Turn (C1-X) == C2 --> X == C1-C2
5621 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5622 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5623 return DAG.getSetCC(
5624 dl, VT, N0.getOperand(1),
5625 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5626 dl, N0.getValueType()),
5627 Cond);
5628
5629 // Could RHSC fold directly into a compare?
5630 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5631 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5632 }
5633
5634 // (X+Y) == X --> Y == 0 and similar folds.
5635 // Don't do this if X is an immediate that can fold into a cmp
5636 // instruction and X+Y has other uses. It could be an induction variable
5637 // chain, and the transform would increase register pressure.
5638 if (!LegalRHSImm || N0.hasOneUse())
5639 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5640 return V;
5641 }
5642
5643 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5644 N1.getOpcode() == ISD::XOR)
5645 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5646 return V;
5647
5648 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5649 return V;
5650
5651 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5652 return V;
5653 }
5654
5655 // Fold remainder of division by a constant.
5656 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5657 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5658 // When division is cheap or optimizing for minimum size,
5659 // fall through to DIVREM creation by skipping this fold.
5660 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5661 if (N0.getOpcode() == ISD::UREM) {
5662 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5663 return Folded;
5664 } else if (N0.getOpcode() == ISD::SREM) {
5665 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5666 return Folded;
5667 }
5668 }
5669 }
5670
5671 // Fold away ALL boolean setcc's.
5672 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5673 SDValue Temp;
5674 switch (Cond) {
5675 default: llvm_unreachable("Unknown integer setcc!");
5676 case ISD::SETEQ: // X == Y -> ~(X^Y)
5677 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5678 N0 = DAG.getNOT(dl, Temp, OpVT);
5679 if (!DCI.isCalledByLegalizer())
5680 DCI.AddToWorklist(Temp.getNode());
5681 break;
5682 case ISD::SETNE: // X != Y --> (X^Y)
5683 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5684 break;
5685 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5686 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5687 Temp = DAG.getNOT(dl, N0, OpVT);
5688 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5689 if (!DCI.isCalledByLegalizer())
5690 DCI.AddToWorklist(Temp.getNode());
5691 break;
5692 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5693 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5694 Temp = DAG.getNOT(dl, N1, OpVT);
5695 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5696 if (!DCI.isCalledByLegalizer())
5697 DCI.AddToWorklist(Temp.getNode());
5698 break;
5699 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5700 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5701 Temp = DAG.getNOT(dl, N0, OpVT);
5702 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5703 if (!DCI.isCalledByLegalizer())
5704 DCI.AddToWorklist(Temp.getNode());
5705 break;
5706 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5707 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5708 Temp = DAG.getNOT(dl, N1, OpVT);
5709 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5710 break;
5711 }
5712 if (VT.getScalarType() != MVT::i1) {
5713 if (!DCI.isCalledByLegalizer())
5714 DCI.AddToWorklist(N0.getNode());
5715 // FIXME: If running after legalize, we probably can't do this.
5717 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5718 }
5719 return N0;
5720 }
5721
5722 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5723 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5724 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5726 N1->getFlags().hasNoUnsignedWrap()) ||
5728 N1->getFlags().hasNoSignedWrap())) &&
5730 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5731 }
5732
5733 // Fold (setcc (sub nsw a, b), zero, s??) -> (setcc a, b, s??)
5734 // TODO: Remove that .isVector() check
5735 if (VT.isVector() && isZeroOrZeroSplat(N1) && N0.getOpcode() == ISD::SUB &&
5737 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), Cond);
5738 }
5739
5740 // Could not fold it.
5741 return SDValue();
5742}
5743
5744/// Returns true (and the GlobalValue and the offset) if the node is a
5745/// GlobalAddress + offset.
5747 int64_t &Offset) const {
5748
5749 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5750
5751 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5752 GA = GASD->getGlobal();
5753 Offset += GASD->getOffset();
5754 return true;
5755 }
5756
5757 if (N->isAnyAdd()) {
5758 SDValue N1 = N->getOperand(0);
5759 SDValue N2 = N->getOperand(1);
5760 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5761 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5762 Offset += V->getSExtValue();
5763 return true;
5764 }
5765 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5766 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5767 Offset += V->getSExtValue();
5768 return true;
5769 }
5770 }
5771 }
5772
5773 return false;
5774}
5775
5777 DAGCombinerInfo &DCI) const {
5778 // Default implementation: no optimization.
5779 return SDValue();
5780}
5781
5782//===----------------------------------------------------------------------===//
5783// Inline Assembler Implementation Methods
5784//===----------------------------------------------------------------------===//
5785
5788 unsigned S = Constraint.size();
5789
5790 if (S == 1) {
5791 switch (Constraint[0]) {
5792 default: break;
5793 case 'r':
5794 return C_RegisterClass;
5795 case 'm': // memory
5796 case 'o': // offsetable
5797 case 'V': // not offsetable
5798 return C_Memory;
5799 case 'p': // Address.
5800 return C_Address;
5801 case 'n': // Simple Integer
5802 case 'E': // Floating Point Constant
5803 case 'F': // Floating Point Constant
5804 return C_Immediate;
5805 case 'i': // Simple Integer or Relocatable Constant
5806 case 's': // Relocatable Constant
5807 case 'X': // Allow ANY value.
5808 case 'I': // Target registers.
5809 case 'J':
5810 case 'K':
5811 case 'L':
5812 case 'M':
5813 case 'N':
5814 case 'O':
5815 case 'P':
5816 case '<':
5817 case '>':
5818 return C_Other;
5819 }
5820 }
5821
5822 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5823 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5824 return C_Memory;
5825 return C_Register;
5826 }
5827 return C_Unknown;
5828}
5829
5830/// Try to replace an X constraint, which matches anything, with another that
5831/// has more specific requirements based on the type of the corresponding
5832/// operand.
5833const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5834 if (ConstraintVT.isInteger())
5835 return "r";
5836 if (ConstraintVT.isFloatingPoint())
5837 return "f"; // works for many targets
5838 return nullptr;
5839}
5840
5842 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5843 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5844 return SDValue();
5845}
5846
5847/// Lower the specified operand into the Ops vector.
5848/// If it is invalid, don't add anything to Ops.
5850 StringRef Constraint,
5851 std::vector<SDValue> &Ops,
5852 SelectionDAG &DAG) const {
5853
5854 if (Constraint.size() > 1)
5855 return;
5856
5857 char ConstraintLetter = Constraint[0];
5858 switch (ConstraintLetter) {
5859 default: break;
5860 case 'X': // Allows any operand
5861 case 'i': // Simple Integer or Relocatable Constant
5862 case 'n': // Simple Integer
5863 case 's': { // Relocatable Constant
5864
5866 uint64_t Offset = 0;
5867
5868 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5869 // etc., since getelementpointer is variadic. We can't use
5870 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5871 // while in this case the GA may be furthest from the root node which is
5872 // likely an ISD::ADD.
5873 while (true) {
5874 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5875 // gcc prints these as sign extended. Sign extend value to 64 bits
5876 // now; without this it would get ZExt'd later in
5877 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5878 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5879 BooleanContent BCont = getBooleanContents(MVT::i64);
5880 ISD::NodeType ExtOpc =
5881 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5882 int64_t ExtVal =
5883 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5884 Ops.push_back(
5885 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5886 return;
5887 }
5888 if (ConstraintLetter != 'n') {
5889 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5890 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5891 GA->getValueType(0),
5892 Offset + GA->getOffset()));
5893 return;
5894 }
5895 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5896 Ops.push_back(DAG.getTargetBlockAddress(
5897 BA->getBlockAddress(), BA->getValueType(0),
5898 Offset + BA->getOffset(), BA->getTargetFlags()));
5899 return;
5900 }
5902 Ops.push_back(Op);
5903 return;
5904 }
5905 }
5906 const unsigned OpCode = Op.getOpcode();
5907 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5908 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5909 Op = Op.getOperand(1);
5910 // Subtraction is not commutative.
5911 else if (OpCode == ISD::ADD &&
5912 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5913 Op = Op.getOperand(0);
5914 else
5915 return;
5916 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5917 continue;
5918 }
5919 return;
5920 }
5921 break;
5922 }
5923 }
5924}
5925
5929
5930std::pair<unsigned, const TargetRegisterClass *>
5932 StringRef Constraint,
5933 MVT VT) const {
5934 if (!Constraint.starts_with("{"))
5935 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5936 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5937
5938 // Remove the braces from around the name.
5939 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5940
5941 std::pair<unsigned, const TargetRegisterClass *> R =
5942 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5943
5944 // Figure out which register class contains this reg.
5945 for (const TargetRegisterClass *RC : RI->regclasses()) {
5946 // If none of the value types for this register class are valid, we
5947 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5948 if (!isLegalRC(*RI, *RC))
5949 continue;
5950
5951 for (const MCPhysReg &PR : *RC) {
5952 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5953 std::pair<unsigned, const TargetRegisterClass *> S =
5954 std::make_pair(PR, RC);
5955
5956 // If this register class has the requested value type, return it,
5957 // otherwise keep searching and return the first class found
5958 // if no other is found which explicitly has the requested type.
5959 if (RI->isTypeLegalForClass(*RC, VT))
5960 return S;
5961 if (!R.second)
5962 R = S;
5963 }
5964 }
5965 }
5966
5967 return R;
5968}
5969
5970//===----------------------------------------------------------------------===//
5971// Constraint Selection.
5972
5973/// Return true of this is an input operand that is a matching constraint like
5974/// "4".
5976 assert(!ConstraintCode.empty() && "No known constraint!");
5977 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5978}
5979
5980/// If this is an input matching constraint, this method returns the output
5981/// operand it matches.
5983 assert(!ConstraintCode.empty() && "No known constraint!");
5984 return atoi(ConstraintCode.c_str());
5985}
5986
5987/// Split up the constraint string from the inline assembly value into the
5988/// specific constraints and their prefixes, and also tie in the associated
5989/// operand values.
5990/// If this returns an empty vector, and if the constraint string itself
5991/// isn't empty, there was an error parsing.
5994 const TargetRegisterInfo *TRI,
5995 const CallBase &Call) const {
5996 /// Information about all of the constraints.
5997 AsmOperandInfoVector ConstraintOperands;
5998 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5999 unsigned maCount = 0; // Largest number of multiple alternative constraints.
6000
6001 // Do a prepass over the constraints, canonicalizing them, and building up the
6002 // ConstraintOperands list.
6003 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
6004 unsigned ResNo = 0; // ResNo - The result number of the next output.
6005 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
6006
6007 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
6008 ConstraintOperands.emplace_back(std::move(CI));
6009 AsmOperandInfo &OpInfo = ConstraintOperands.back();
6010
6011 // Update multiple alternative constraint count.
6012 if (OpInfo.multipleAlternatives.size() > maCount)
6013 maCount = OpInfo.multipleAlternatives.size();
6014
6015 OpInfo.ConstraintVT = MVT::Other;
6016
6017 // Compute the value type for each operand.
6018 switch (OpInfo.Type) {
6019 case InlineAsm::isOutput: {
6020 // Indirect outputs just consume an argument.
6021 if (OpInfo.isIndirect) {
6022 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6023 break;
6024 }
6025
6026 // The return value of the call is this value. As such, there is no
6027 // corresponding argument.
6028 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
6029 EVT VT;
6030 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
6031 VT = getAsmOperandValueType(DL, STy->getElementType(ResNo));
6032 } else {
6033 assert(ResNo == 0 && "Asm only has one result!");
6034 VT = getAsmOperandValueType(DL, Call.getType());
6035 }
6036 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6037 ++ResNo;
6038 break;
6039 }
6040 case InlineAsm::isInput:
6041 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6042 break;
6043 case InlineAsm::isLabel:
6044 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
6045 ++LabelNo;
6046 continue;
6048 // Nothing to do.
6049 break;
6050 }
6051
6052 if (OpInfo.CallOperandVal) {
6053 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
6054 if (OpInfo.isIndirect) {
6055 OpTy = Call.getParamElementType(ArgNo);
6056 assert(OpTy && "Indirect operand must have elementtype attribute");
6057 }
6058
6059 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6060 if (StructType *STy = dyn_cast<StructType>(OpTy))
6061 if (STy->getNumElements() == 1)
6062 OpTy = STy->getElementType(0);
6063
6064 // If OpTy is not a single value, it may be a struct/union that we
6065 // can tile with integers.
6066 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6067 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6068 switch (BitSize) {
6069 default: break;
6070 case 1:
6071 case 8:
6072 case 16:
6073 case 32:
6074 case 64:
6075 case 128:
6076 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
6077 break;
6078 }
6079 }
6080
6081 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6082 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6083 ArgNo++;
6084 }
6085 }
6086
6087 // If we have multiple alternative constraints, select the best alternative.
6088 if (!ConstraintOperands.empty()) {
6089 if (maCount) {
6090 unsigned bestMAIndex = 0;
6091 int bestWeight = -1;
6092 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6093 int weight = -1;
6094 unsigned maIndex;
6095 // Compute the sums of the weights for each alternative, keeping track
6096 // of the best (highest weight) one so far.
6097 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6098 int weightSum = 0;
6099 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6100 cIndex != eIndex; ++cIndex) {
6101 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6102 if (OpInfo.Type == InlineAsm::isClobber)
6103 continue;
6104
6105 // If this is an output operand with a matching input operand,
6106 // look up the matching input. If their types mismatch, e.g. one
6107 // is an integer, the other is floating point, or their sizes are
6108 // different, flag it as an maCantMatch.
6109 if (OpInfo.hasMatchingInput()) {
6110 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6111 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6112 if ((OpInfo.ConstraintVT.isInteger() !=
6113 Input.ConstraintVT.isInteger()) ||
6114 (OpInfo.ConstraintVT.getSizeInBits() !=
6115 Input.ConstraintVT.getSizeInBits())) {
6116 weightSum = -1; // Can't match.
6117 break;
6118 }
6119 }
6120 }
6121 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6122 if (weight == -1) {
6123 weightSum = -1;
6124 break;
6125 }
6126 weightSum += weight;
6127 }
6128 // Update best.
6129 if (weightSum > bestWeight) {
6130 bestWeight = weightSum;
6131 bestMAIndex = maIndex;
6132 }
6133 }
6134
6135 // Now select chosen alternative in each constraint.
6136 for (AsmOperandInfo &cInfo : ConstraintOperands)
6137 if (cInfo.Type != InlineAsm::isClobber)
6138 cInfo.selectAlternative(bestMAIndex);
6139 }
6140 }
6141
6142 // Check and hook up tied operands, choose constraint code to use.
6143 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6144 cIndex != eIndex; ++cIndex) {
6145 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6146
6147 // If this is an output operand with a matching input operand, look up the
6148 // matching input. If their types mismatch, e.g. one is an integer, the
6149 // other is floating point, or their sizes are different, flag it as an
6150 // error.
6151 if (OpInfo.hasMatchingInput()) {
6152 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6153
6154 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6155 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6156 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6157 OpInfo.ConstraintVT);
6158 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6159 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6160 Input.ConstraintVT);
6161 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6162 OpInfo.ConstraintVT.isFloatingPoint();
6163 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6164 Input.ConstraintVT.isFloatingPoint();
6165 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6166 (MatchRC.second != InputRC.second)) {
6167 report_fatal_error("Unsupported asm: input constraint"
6168 " with a matching output constraint of"
6169 " incompatible type!");
6170 }
6171 }
6172 }
6173 }
6174
6175 return ConstraintOperands;
6176}
6177
6178/// Return a number indicating our preference for chosing a type of constraint
6179/// over another, for the purpose of sorting them. Immediates are almost always
6180/// preferrable (when they can be emitted). A higher return value means a
6181/// stronger preference for one constraint type relative to another.
6182/// FIXME: We should prefer registers over memory but doing so may lead to
6183/// unrecoverable register exhaustion later.
6184/// https://github.com/llvm/llvm-project/issues/20571
6186 switch (CT) {
6189 return 4;
6192 return 3;
6194 return 2;
6196 return 1;
6198 return 0;
6199 }
6200 llvm_unreachable("Invalid constraint type");
6201}
6202
6203/// Examine constraint type and operand type and determine a weight value.
6204/// This object must already have been set up with the operand type
6205/// and the current alternative constraint selected.
6208 AsmOperandInfo &info, int maIndex) const {
6210 if (maIndex >= (int)info.multipleAlternatives.size())
6211 rCodes = &info.Codes;
6212 else
6213 rCodes = &info.multipleAlternatives[maIndex].Codes;
6214 ConstraintWeight BestWeight = CW_Invalid;
6215
6216 // Loop over the options, keeping track of the most general one.
6217 for (const std::string &rCode : *rCodes) {
6218 ConstraintWeight weight =
6219 getSingleConstraintMatchWeight(info, rCode.c_str());
6220 if (weight > BestWeight)
6221 BestWeight = weight;
6222 }
6223
6224 return BestWeight;
6225}
6226
6227/// Examine constraint type and operand type and determine a weight value.
6228/// This object must already have been set up with the operand type
6229/// and the current alternative constraint selected.
6232 AsmOperandInfo &info, const char *constraint) const {
6234 Value *CallOperandVal = info.CallOperandVal;
6235 // If we don't have a value, we can't do a match,
6236 // but allow it at the lowest weight.
6237 if (!CallOperandVal)
6238 return CW_Default;
6239 // Look at the constraint type.
6240 switch (*constraint) {
6241 case 'i': // immediate integer.
6242 case 'n': // immediate integer with a known value.
6243 if (isa<ConstantInt>(CallOperandVal))
6244 weight = CW_Constant;
6245 break;
6246 case 's': // non-explicit intregal immediate.
6247 if (isa<GlobalValue>(CallOperandVal))
6248 weight = CW_Constant;
6249 break;
6250 case 'E': // immediate float if host format.
6251 case 'F': // immediate float.
6252 if (isa<ConstantFP>(CallOperandVal))
6253 weight = CW_Constant;
6254 break;
6255 case '<': // memory operand with autodecrement.
6256 case '>': // memory operand with autoincrement.
6257 case 'm': // memory operand.
6258 case 'o': // offsettable memory operand
6259 case 'V': // non-offsettable memory operand
6260 weight = CW_Memory;
6261 break;
6262 case 'r': // general register.
6263 case 'g': // general register, memory operand or immediate integer.
6264 // note: Clang converts "g" to "imr".
6265 if (CallOperandVal->getType()->isIntegerTy())
6266 weight = CW_Register;
6267 break;
6268 case 'X': // any operand.
6269 default:
6270 weight = CW_Default;
6271 break;
6272 }
6273 return weight;
6274}
6275
6276/// If there are multiple different constraints that we could pick for this
6277/// operand (e.g. "imr") try to pick the 'best' one.
6278/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6279/// into seven classes:
6280/// Register -> one specific register
6281/// RegisterClass -> a group of regs
6282/// Memory -> memory
6283/// Address -> a symbolic memory reference
6284/// Immediate -> immediate values
6285/// Other -> magic values (such as "Flag Output Operands")
6286/// Unknown -> something we don't recognize yet and can't handle
6287/// Ideally, we would pick the most specific constraint possible: if we have
6288/// something that fits into a register, we would pick it. The problem here
6289/// is that if we have something that could either be in a register or in
6290/// memory that use of the register could cause selection of *other*
6291/// operands to fail: they might only succeed if we pick memory. Because of
6292/// this the heuristic we use is:
6293///
6294/// 1) If there is an 'other' constraint, and if the operand is valid for
6295/// that constraint, use it. This makes us take advantage of 'i'
6296/// constraints when available.
6297/// 2) Otherwise, pick the most general constraint present. This prefers
6298/// 'm' over 'r', for example.
6299///
6301 TargetLowering::AsmOperandInfo &OpInfo) const {
6302 ConstraintGroup Ret;
6303
6304 Ret.reserve(OpInfo.Codes.size());
6305 for (StringRef Code : OpInfo.Codes) {
6307
6308 // Indirect 'other' or 'immediate' constraints are not allowed.
6309 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6310 CType == TargetLowering::C_Register ||
6312 continue;
6313
6314 // Things with matching constraints can only be registers, per gcc
6315 // documentation. This mainly affects "g" constraints.
6316 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6317 continue;
6318
6319 Ret.emplace_back(Code, CType);
6320 }
6321
6323 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6324 });
6325
6326 return Ret;
6327}
6328
6329/// If we have an immediate, see if we can lower it. Return true if we can,
6330/// false otherwise.
6332 SDValue Op, SelectionDAG *DAG,
6333 const TargetLowering &TLI) {
6334
6335 assert((P.second == TargetLowering::C_Other ||
6336 P.second == TargetLowering::C_Immediate) &&
6337 "need immediate or other");
6338
6339 if (!Op.getNode())
6340 return false;
6341
6342 std::vector<SDValue> ResultOps;
6343 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6344 return !ResultOps.empty();
6345}
6346
6347/// Determines the constraint code and constraint type to use for the specific
6348/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6350 SDValue Op,
6351 SelectionDAG *DAG) const {
6352 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6353
6354 // Single-letter constraints ('r') are very common.
6355 if (OpInfo.Codes.size() == 1) {
6356 OpInfo.ConstraintCode = OpInfo.Codes[0];
6357 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6358 } else {
6360 if (G.empty())
6361 return;
6362
6363 unsigned BestIdx = 0;
6364 for (const unsigned E = G.size();
6365 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6366 G[BestIdx].second == TargetLowering::C_Immediate);
6367 ++BestIdx) {
6368 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6369 break;
6370 // If we're out of constraints, just pick the first one.
6371 if (BestIdx + 1 == E) {
6372 BestIdx = 0;
6373 break;
6374 }
6375 }
6376
6377 OpInfo.ConstraintCode = G[BestIdx].first;
6378 OpInfo.ConstraintType = G[BestIdx].second;
6379 }
6380
6381 // 'X' matches anything.
6382 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6383 // Constants are handled elsewhere. For Functions, the type here is the
6384 // type of the result, which is not what we want to look at; leave them
6385 // alone.
6386 Value *v = OpInfo.CallOperandVal;
6387 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6388 return;
6389 }
6390
6391 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6392 OpInfo.ConstraintCode = "i";
6393 return;
6394 }
6395
6396 // Otherwise, try to resolve it to something we know about by looking at
6397 // the actual operand type.
6398 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6399 OpInfo.ConstraintCode = Repl;
6400 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6401 }
6402 }
6403}
6404
6405/// Given an exact SDIV by a constant, create a multiplication
6406/// with the multiplicative inverse of the constant.
6407/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6409 const SDLoc &dl, SelectionDAG &DAG,
6410 SmallVectorImpl<SDNode *> &Created) {
6411 SDValue Op0 = N->getOperand(0);
6412 SDValue Op1 = N->getOperand(1);
6413 EVT VT = N->getValueType(0);
6414 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6415 EVT ShSVT = ShVT.getScalarType();
6416
6417 bool UseSRA = false;
6418 SmallVector<SDValue, 16> Shifts, Factors;
6419
6420 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6421 if (C->isZero())
6422 return false;
6423
6424 EVT CT = C->getValueType(0);
6425 APInt Divisor = C->getAPIntValue();
6426 unsigned Shift = Divisor.countr_zero();
6427 if (Shift) {
6428 Divisor.ashrInPlace(Shift);
6429 UseSRA = true;
6430 }
6431 APInt Factor = Divisor.multiplicativeInverse();
6432 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6433 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6434 return true;
6435 };
6436
6437 // Collect all magic values from the build vector.
6438 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern, /*AllowUndefs=*/false,
6439 /*AllowTruncation=*/true))
6440 return SDValue();
6441
6442 SDValue Shift, Factor;
6443 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6444 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6445 Factor = DAG.getBuildVector(VT, dl, Factors);
6446 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6447 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6448 "Expected matchUnaryPredicate to return one element for scalable "
6449 "vectors");
6450 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6451 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6452 } else {
6453 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6454 Shift = Shifts[0];
6455 Factor = Factors[0];
6456 }
6457
6458 SDValue Res = Op0;
6459 if (UseSRA) {
6460 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6461 Created.push_back(Res.getNode());
6462 }
6463
6464 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6465}
6466
6467/// Given an exact UDIV by a constant, create a multiplication
6468/// with the multiplicative inverse of the constant.
6469/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6471 const SDLoc &dl, SelectionDAG &DAG,
6472 SmallVectorImpl<SDNode *> &Created) {
6473 EVT VT = N->getValueType(0);
6474 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6475 EVT ShSVT = ShVT.getScalarType();
6476
6477 bool UseSRL = false;
6478 SmallVector<SDValue, 16> Shifts, Factors;
6479
6480 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6481 if (C->isZero())
6482 return false;
6483
6484 EVT CT = C->getValueType(0);
6485 APInt Divisor = C->getAPIntValue();
6486 unsigned Shift = Divisor.countr_zero();
6487 if (Shift) {
6488 Divisor.lshrInPlace(Shift);
6489 UseSRL = true;
6490 }
6491 // Calculate the multiplicative inverse modulo BW.
6492 APInt Factor = Divisor.multiplicativeInverse();
6493 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6494 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6495 return true;
6496 };
6497
6498 SDValue Op1 = N->getOperand(1);
6499
6500 // Collect all magic values from the build vector.
6501 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern, /*AllowUndefs=*/false,
6502 /*AllowTruncation=*/true))
6503 return SDValue();
6504
6505 SDValue Shift, Factor;
6506 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6507 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6508 Factor = DAG.getBuildVector(VT, dl, Factors);
6509 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6510 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6511 "Expected matchUnaryPredicate to return one element for scalable "
6512 "vectors");
6513 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6514 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6515 } else {
6516 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6517 Shift = Shifts[0];
6518 Factor = Factors[0];
6519 }
6520
6521 SDValue Res = N->getOperand(0);
6522 if (UseSRL) {
6523 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6524 Created.push_back(Res.getNode());
6525 }
6526
6527 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6528}
6529
6531 SelectionDAG &DAG,
6532 SmallVectorImpl<SDNode *> &Created) const {
6533 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6534 if (isIntDivCheap(N->getValueType(0), Attr))
6535 return SDValue(N, 0); // Lower SDIV as SDIV
6536 return SDValue();
6537}
6538
6539SDValue
6541 SelectionDAG &DAG,
6542 SmallVectorImpl<SDNode *> &Created) const {
6543 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6544 if (isIntDivCheap(N->getValueType(0), Attr))
6545 return SDValue(N, 0); // Lower SREM as SREM
6546 return SDValue();
6547}
6548
6549/// Build sdiv by power-of-2 with conditional move instructions
6550/// Ref: "Hacker's Delight" by Henry Warren 10-1
6551/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6552/// bgez x, label
6553/// add x, x, 2**k-1
6554/// label:
6555/// sra res, x, k
6556/// neg res, res (when the divisor is negative)
6558 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6559 SmallVectorImpl<SDNode *> &Created) const {
6560 unsigned Lg2 = Divisor.countr_zero();
6561 EVT VT = N->getValueType(0);
6562
6563 SDLoc DL(N);
6564 SDValue N0 = N->getOperand(0);
6565 SDValue Zero = DAG.getConstant(0, DL, VT);
6566 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6567 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6568
6569 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6570 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6571 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6572 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6573 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6574
6575 Created.push_back(Cmp.getNode());
6576 Created.push_back(Add.getNode());
6577 Created.push_back(CMov.getNode());
6578
6579 // Divide by pow2.
6580 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6581 DAG.getShiftAmountConstant(Lg2, VT, DL));
6582
6583 // If we're dividing by a positive value, we're done. Otherwise, we must
6584 // negate the result.
6585 if (Divisor.isNonNegative())
6586 return SRA;
6587
6588 Created.push_back(SRA.getNode());
6589 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6590}
6591
6592/// Given an ISD::SDIV node expressing a divide by constant,
6593/// return a DAG expression to select that will generate the same value by
6594/// multiplying by a magic number.
6595/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6597 bool IsAfterLegalization,
6598 bool IsAfterLegalTypes,
6599 SmallVectorImpl<SDNode *> &Created) const {
6600 SDLoc dl(N);
6601
6602 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6603 if (N->getFlags().hasExact())
6604 return BuildExactSDIV(*this, N, dl, DAG, Created);
6605
6606 EVT VT = N->getValueType(0);
6607 EVT SVT = VT.getScalarType();
6608 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6609 EVT ShSVT = ShVT.getScalarType();
6610 unsigned EltBits = VT.getScalarSizeInBits();
6611 EVT MulVT;
6612
6613 // Check to see if we can do this.
6614 // FIXME: We should be more aggressive here.
6615 if (!isTypeLegal(VT)) {
6616 // Limit this to simple scalars for now.
6617 if (VT.isVector() || !VT.isSimple())
6618 return SDValue();
6619
6620 // If this type will be promoted to a large enough type with a legal
6621 // multiply operation, we can go ahead and do this transform.
6623 return SDValue();
6624
6625 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6626 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6627 !isOperationLegal(ISD::MUL, MulVT))
6628 return SDValue();
6629 }
6630
6631 bool HasMULHS = isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization);
6632 bool HasSMUL_LOHI =
6633 isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization);
6634
6635 if (!HasMULHS && !HasSMUL_LOHI && MulVT == EVT()) {
6636 // If type twice as wide legal, widen and use a mul plus a shift.
6637 unsigned Size = VT.getScalarSizeInBits();
6638 EVT WideVT = VT.changeElementType(
6639 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), Size * 2));
6640 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6641 // custom lowered. This is very expensive so avoid it at all costs for
6642 // constant divisors.
6643 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6646 MulVT = WideVT;
6647 }
6648
6649 if (!HasMULHS && !HasSMUL_LOHI && MulVT == EVT())
6650 return SDValue();
6651
6652 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6653
6654 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6655 if (C->isZero())
6656 return false;
6657 // Truncate the divisor to the target scalar type in case it was promoted
6658 // during type legalization.
6659 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6661 int NumeratorFactor = 0;
6662 int ShiftMask = -1;
6663
6664 if (Divisor.isOne() || Divisor.isAllOnes()) {
6665 // If d is +1/-1, we just multiply the numerator by +1/-1.
6666 NumeratorFactor = Divisor.getSExtValue();
6667 magics.Magic = 0;
6668 magics.ShiftAmount = 0;
6669 ShiftMask = 0;
6670 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6671 // If d > 0 and m < 0, add the numerator.
6672 NumeratorFactor = 1;
6673 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6674 // If d < 0 and m > 0, subtract the numerator.
6675 NumeratorFactor = -1;
6676 }
6677
6678 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6679 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6680 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6681 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6682 return true;
6683 };
6684
6685 SDValue N0 = N->getOperand(0);
6686 SDValue N1 = N->getOperand(1);
6687
6688 // Collect the shifts / magic values from each element.
6689 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
6690 /*AllowTruncation=*/true))
6691 return SDValue();
6692
6693 SDValue MagicFactor, Factor, Shift, ShiftMask;
6694 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6695 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6696 Factor = DAG.getBuildVector(VT, dl, Factors);
6697 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6698 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6699 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6700 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6701 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6702 "Expected matchUnaryPredicate to return one element for scalable "
6703 "vectors");
6704 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6705 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6706 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6707 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6708 } else {
6709 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6710 MagicFactor = MagicFactors[0];
6711 Factor = Factors[0];
6712 Shift = Shifts[0];
6713 ShiftMask = ShiftMasks[0];
6714 }
6715
6716 // Multiply the numerator (operand 0) by the magic value.
6717 auto GetMULHS = [&](SDValue X, SDValue Y) {
6718 if (HasMULHS)
6719 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6720 if (HasSMUL_LOHI) {
6721 SDValue LoHi =
6722 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6723 return LoHi.getValue(1);
6724 }
6725
6726 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6727 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6728 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6729 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6730 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6731 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6732 };
6733
6734 SDValue Q = GetMULHS(N0, MagicFactor);
6735 if (!Q)
6736 return SDValue();
6737
6738 Created.push_back(Q.getNode());
6739
6740 // (Optionally) Add/subtract the numerator using Factor.
6741 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6742 Created.push_back(Factor.getNode());
6743 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6744 Created.push_back(Q.getNode());
6745
6746 // Shift right algebraic by shift value.
6747 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6748 Created.push_back(Q.getNode());
6749
6750 // Extract the sign bit, mask it and add it to the quotient.
6751 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6752 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6753 Created.push_back(T.getNode());
6754 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6755 Created.push_back(T.getNode());
6756 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6757}
6758
6759/// Given an ISD::UDIV node expressing a divide by constant,
6760/// return a DAG expression to select that will generate the same value by
6761/// multiplying by a magic number.
6762/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6764 bool IsAfterLegalization,
6765 bool IsAfterLegalTypes,
6766 SmallVectorImpl<SDNode *> &Created) const {
6767 SDLoc dl(N);
6768
6769 // If the udiv has an 'exact' bit we can use a simpler lowering.
6770 if (N->getFlags().hasExact())
6771 return BuildExactUDIV(*this, N, dl, DAG, Created);
6772
6773 EVT VT = N->getValueType(0);
6774 EVT SVT = VT.getScalarType();
6775 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6776 EVT ShSVT = ShVT.getScalarType();
6777 unsigned EltBits = VT.getScalarSizeInBits();
6778 EVT MulVT;
6779
6780 // Check to see if we can do this.
6781 // FIXME: We should be more aggressive here.
6782 if (!isTypeLegal(VT)) {
6783 // Limit this to simple scalars for now.
6784 if (VT.isVector() || !VT.isSimple())
6785 return SDValue();
6786
6787 // If this type will be promoted to a large enough type with a legal
6788 // multiply operation, we can go ahead and do this transform.
6790 return SDValue();
6791
6792 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6793 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6794 !isOperationLegal(ISD::MUL, MulVT))
6795 return SDValue();
6796 }
6797
6798 bool HasMULHU = isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization);
6799 bool HasUMUL_LOHI =
6800 isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization);
6801
6802 if (!HasMULHU && !HasUMUL_LOHI && MulVT == EVT()) {
6803 // If type twice as wide legal, widen and use a mul plus a shift.
6804 unsigned Size = VT.getScalarSizeInBits();
6805 EVT WideVT = VT.changeElementType(
6806 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), Size * 2));
6807 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6808 // custom lowered. This is very expensive so avoid it at all costs for
6809 // constant divisors.
6810 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6813 MulVT = WideVT;
6814 }
6815
6816 if (!HasMULHU && !HasUMUL_LOHI && MulVT == EVT())
6817 return SDValue();
6818
6819 SDValue N0 = N->getOperand(0);
6820 SDValue N1 = N->getOperand(1);
6821
6822 // Try to use leading zeros of the dividend to reduce the multiplier and
6823 // avoid expensive fixups.
6824 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6825
6826 // If we're after type legalization and SVT is not legal, use the
6827 // promoted type for creating constants to avoid creating nodes with
6828 // illegal types.
6829 if (IsAfterLegalTypes && VT.isVector()) {
6830 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6831 if (SVT.bitsLT(VT.getScalarType()))
6832 return SDValue();
6833 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6834 if (ShSVT.bitsLT(ShVT.getScalarType()))
6835 return SDValue();
6836 }
6837 const unsigned SVTBits = SVT.getSizeInBits();
6838
6839 // Allow i32 to be widened to i64 for uncooperative divisors if i64 MULHU or
6840 // UMUL_LOHI is supported.
6841 const EVT WideSVT = MVT::i64;
6842 const bool HasWideMULHU =
6843 VT == MVT::i32 &&
6844 isOperationLegalOrCustom(ISD::MULHU, WideSVT, IsAfterLegalization);
6845 const bool HasWideUMUL_LOHI =
6846 VT == MVT::i32 &&
6847 isOperationLegalOrCustom(ISD::UMUL_LOHI, WideSVT, IsAfterLegalization);
6848 const bool AllowWiden = (HasWideMULHU || HasWideUMUL_LOHI);
6849
6850 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6851 bool UseWiden = false;
6852 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6853
6854 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6855 if (C->isZero())
6856 return false;
6857 // Truncate the divisor to the target scalar type in case it was promoted
6858 // during type legalization.
6859 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6860
6861 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6862
6863 // Magic algorithm doesn't work for division by 1. We need to emit a select
6864 // at the end.
6865 if (Divisor.isOne()) {
6866 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6867 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6868 } else {
6871 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()),
6872 /*AllowEvenDivisorOptimization=*/true,
6873 /*AllowWidenOptimization=*/AllowWiden);
6874
6875 if (magics.Widen) {
6876 UseWiden = true;
6877 MagicFactor = DAG.getConstant(magics.Magic, dl, WideSVT);
6878 } else {
6879 MagicFactor = DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT);
6880 }
6881
6882 assert(magics.PreShift < Divisor.getBitWidth() &&
6883 "We shouldn't generate an undefined shift!");
6884 assert(magics.PostShift < Divisor.getBitWidth() &&
6885 "We shouldn't generate an undefined shift!");
6886 assert((!magics.IsAdd || magics.PreShift == 0) &&
6887 "Unexpected pre-shift");
6888 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6889 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6890 NPQFactor = DAG.getConstant(
6891 magics.IsAdd ? APInt::getOneBitSet(SVTBits, EltBits - 1)
6892 : APInt::getZero(SVTBits),
6893 dl, SVT);
6894 UseNPQ |= magics.IsAdd;
6895 UsePreShift |= magics.PreShift != 0;
6896 UsePostShift |= magics.PostShift != 0;
6897 }
6898
6899 PreShifts.push_back(PreShift);
6900 MagicFactors.push_back(MagicFactor);
6901 NPQFactors.push_back(NPQFactor);
6902 PostShifts.push_back(PostShift);
6903 return true;
6904 };
6905
6906 // Collect the shifts/magic values from each element.
6907 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
6908 /*AllowTruncation=*/true))
6909 return SDValue();
6910
6911 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6912 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6913 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6914 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6915 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6916 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6917 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6918 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6919 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6920 "Expected matchUnaryPredicate to return one for scalable vectors");
6921 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6922 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6923 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6924 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6925 } else {
6926 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6927 PreShift = PreShifts[0];
6928 MagicFactor = MagicFactors[0];
6929 PostShift = PostShifts[0];
6930 }
6931
6932 if (UseWiden) {
6933 // Compute: (WideSVT(x) * MagicFactor) >> WideSVTBits.
6934 SDValue WideN0 = DAG.getNode(ISD::ZERO_EXTEND, dl, WideSVT, N0);
6935
6936 // Perform WideSVTxWideSVT -> 2*WideSVT multiplication and extract high
6937 // WideSVT bits
6938 SDValue High;
6939 if (HasWideMULHU) {
6940 High = DAG.getNode(ISD::MULHU, dl, WideSVT, WideN0, MagicFactor);
6941 } else {
6942 assert(HasWideUMUL_LOHI);
6943 SDValue LoHi =
6944 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(WideSVT, WideSVT),
6945 WideN0, MagicFactor);
6946 High = LoHi.getValue(1);
6947 }
6948
6949 Created.push_back(High.getNode());
6950 return DAG.getNode(ISD::TRUNCATE, dl, VT, High);
6951 }
6952
6953 SDValue Q = N0;
6954 if (UsePreShift) {
6955 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6956 Created.push_back(Q.getNode());
6957 }
6958
6959 auto GetMULHU = [&](SDValue X, SDValue Y) {
6960 if (HasMULHU)
6961 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6962 if (HasUMUL_LOHI) {
6963 SDValue LoHi =
6964 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6965 return LoHi.getValue(1);
6966 }
6967
6968 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6969 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6970 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6971 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6972 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6973 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6974 };
6975
6976 // Multiply the numerator (operand 0) by the magic value.
6977 Q = GetMULHU(Q, MagicFactor);
6978 if (!Q)
6979 return SDValue();
6980
6981 Created.push_back(Q.getNode());
6982
6983 if (UseNPQ) {
6984 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6985 Created.push_back(NPQ.getNode());
6986
6987 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6988 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6989 if (VT.isVector())
6990 NPQ = GetMULHU(NPQ, NPQFactor);
6991 else
6992 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6993
6994 Created.push_back(NPQ.getNode());
6995
6996 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6997 Created.push_back(Q.getNode());
6998 }
6999
7000 if (UsePostShift) {
7001 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
7002 Created.push_back(Q.getNode());
7003 }
7004
7005 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7006
7007 SDValue One = DAG.getConstant(1, dl, VT);
7008 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
7009 return DAG.getSelect(dl, VT, IsOne, N0, Q);
7010}
7011
7012/// If all values in Values that *don't* match the predicate are same 'splat'
7013/// value, then replace all values with that splat value.
7014/// Else, if AlternativeReplacement was provided, then replace all values that
7015/// do match predicate with AlternativeReplacement value.
7016static void
7018 std::function<bool(SDValue)> Predicate,
7019 SDValue AlternativeReplacement = SDValue()) {
7020 SDValue Replacement;
7021 // Is there a value for which the Predicate does *NOT* match? What is it?
7022 auto SplatValue = llvm::find_if_not(Values, Predicate);
7023 if (SplatValue != Values.end()) {
7024 // Does Values consist only of SplatValue's and values matching Predicate?
7025 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
7026 return Value == *SplatValue || Predicate(Value);
7027 })) // Then we shall replace values matching predicate with SplatValue.
7028 Replacement = *SplatValue;
7029 }
7030 if (!Replacement) {
7031 // Oops, we did not find the "baseline" splat value.
7032 if (!AlternativeReplacement)
7033 return; // Nothing to do.
7034 // Let's replace with provided value then.
7035 Replacement = AlternativeReplacement;
7036 }
7037 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
7038}
7039
7040/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
7041/// where the divisor is constant and the comparison target is zero,
7042/// return a DAG expression that will generate the same comparison result
7043/// using only multiplications, additions and shifts/rotations.
7044/// Ref: "Hacker's Delight" 10-17.
7045SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
7046 SDValue CompTargetNode,
7048 DAGCombinerInfo &DCI,
7049 const SDLoc &DL) const {
7051 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7052 DCI, DL, Built)) {
7053 for (SDNode *N : Built)
7054 DCI.AddToWorklist(N);
7055 return Folded;
7056 }
7057
7058 return SDValue();
7059}
7060
7061SDValue
7062TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
7063 SDValue CompTargetNode, ISD::CondCode Cond,
7064 DAGCombinerInfo &DCI, const SDLoc &DL,
7065 SmallVectorImpl<SDNode *> &Created) const {
7066 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
7067 // - D must be constant, with D = D0 * 2^K where D0 is odd
7068 // - P is the multiplicative inverse of D0 modulo 2^W
7069 // - Q = floor(((2^W) - 1) / D)
7070 // where W is the width of the common type of N and D.
7071 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7072 "Only applicable for (in)equality comparisons.");
7073
7074 SelectionDAG &DAG = DCI.DAG;
7075
7076 EVT VT = REMNode.getValueType();
7077 EVT SVT = VT.getScalarType();
7078 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7079 EVT ShSVT = ShVT.getScalarType();
7080
7081 // If MUL is unavailable, we cannot proceed in any case.
7082 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7083 return SDValue();
7084
7085 bool ComparingWithAllZeros = true;
7086 bool AllComparisonsWithNonZerosAreTautological = true;
7087 bool HadTautologicalLanes = false;
7088 bool AllLanesAreTautological = true;
7089 bool HadEvenDivisor = false;
7090 bool AllDivisorsArePowerOfTwo = true;
7091 bool HadTautologicalInvertedLanes = false;
7092 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
7093
7094 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
7095 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7096 if (CDiv->isZero())
7097 return false;
7098
7099 const APInt &D = CDiv->getAPIntValue();
7100 const APInt &Cmp = CCmp->getAPIntValue();
7101
7102 ComparingWithAllZeros &= Cmp.isZero();
7103
7104 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7105 // if C2 is not less than C1, the comparison is always false.
7106 // But we will only be able to produce the comparison that will give the
7107 // opposive tautological answer. So this lane would need to be fixed up.
7108 bool TautologicalInvertedLane = D.ule(Cmp);
7109 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
7110
7111 // If all lanes are tautological (either all divisors are ones, or divisor
7112 // is not greater than the constant we are comparing with),
7113 // we will prefer to avoid the fold.
7114 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
7115 HadTautologicalLanes |= TautologicalLane;
7116 AllLanesAreTautological &= TautologicalLane;
7117
7118 // If we are comparing with non-zero, we need'll need to subtract said
7119 // comparison value from the LHS. But there is no point in doing that if
7120 // every lane where we are comparing with non-zero is tautological..
7121 if (!Cmp.isZero())
7122 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7123
7124 // Decompose D into D0 * 2^K
7125 unsigned K = D.countr_zero();
7126 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7127 APInt D0 = D.lshr(K);
7128
7129 // D is even if it has trailing zeros.
7130 HadEvenDivisor |= (K != 0);
7131 // D is a power-of-two if D0 is one.
7132 // If all divisors are power-of-two, we will prefer to avoid the fold.
7133 AllDivisorsArePowerOfTwo &= D0.isOne();
7134
7135 // P = inv(D0, 2^W)
7136 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7137 unsigned W = D.getBitWidth();
7138 APInt P = D0.multiplicativeInverse();
7139 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7140
7141 // Q = floor((2^W - 1) u/ D)
7142 // R = ((2^W - 1) u% D)
7143 APInt Q, R;
7145
7146 // If we are comparing with zero, then that comparison constant is okay,
7147 // else it may need to be one less than that.
7148 if (Cmp.ugt(R))
7149 Q -= 1;
7150
7152 "We are expecting that K is always less than all-ones for ShSVT");
7153
7154 // If the lane is tautological the result can be constant-folded.
7155 if (TautologicalLane) {
7156 // Set P and K amount to a bogus values so we can try to splat them.
7157 P = 0;
7158 K = -1;
7159 // And ensure that comparison constant is tautological,
7160 // it will always compare true/false.
7161 Q = -1;
7162 }
7163
7164 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7165 KAmts.push_back(
7166 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7167 /*implicitTrunc=*/true),
7168 DL, ShSVT));
7169 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7170 return true;
7171 };
7172
7173 SDValue N = REMNode.getOperand(0);
7174 SDValue D = REMNode.getOperand(1);
7175
7176 // Collect the values from each element.
7177 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7178 return SDValue();
7179
7180 // If all lanes are tautological, the result can be constant-folded.
7181 if (AllLanesAreTautological)
7182 return SDValue();
7183
7184 // If this is a urem by a powers-of-two, avoid the fold since it can be
7185 // best implemented as a bit test.
7186 if (AllDivisorsArePowerOfTwo)
7187 return SDValue();
7188
7189 SDValue PVal, KVal, QVal;
7190 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7191 if (HadTautologicalLanes) {
7192 // Try to turn PAmts into a splat, since we don't care about the values
7193 // that are currently '0'. If we can't, just keep '0'`s.
7195 // Try to turn KAmts into a splat, since we don't care about the values
7196 // that are currently '-1'. If we can't, change them to '0'`s.
7198 DAG.getConstant(0, DL, ShSVT));
7199 }
7200
7201 PVal = DAG.getBuildVector(VT, DL, PAmts);
7202 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7203 QVal = DAG.getBuildVector(VT, DL, QAmts);
7204 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7205 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7206 "Expected matchBinaryPredicate to return one element for "
7207 "SPLAT_VECTORs");
7208 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7209 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7210 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7211 } else {
7212 PVal = PAmts[0];
7213 KVal = KAmts[0];
7214 QVal = QAmts[0];
7215 }
7216
7217 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7218 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7219 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7220 assert(CompTargetNode.getValueType() == N.getValueType() &&
7221 "Expecting that the types on LHS and RHS of comparisons match.");
7222 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7223 }
7224
7225 // (mul N, P)
7226 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7227 Created.push_back(Op0.getNode());
7228
7229 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7230 // divisors as a performance improvement, since rotating by 0 is a no-op.
7231 if (HadEvenDivisor) {
7232 // We need ROTR to do this.
7233 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7234 return SDValue();
7235 // UREM: (rotr (mul N, P), K)
7236 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7237 Created.push_back(Op0.getNode());
7238 }
7239
7240 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7241 SDValue NewCC =
7242 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7244 if (!HadTautologicalInvertedLanes)
7245 return NewCC;
7246
7247 // If any lanes previously compared always-false, the NewCC will give
7248 // always-true result for them, so we need to fixup those lanes.
7249 // Or the other way around for inequality predicate.
7250 assert(VT.isVector() && "Can/should only get here for vectors.");
7251 Created.push_back(NewCC.getNode());
7252
7253 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7254 // if C2 is not less than C1, the comparison is always false.
7255 // But we have produced the comparison that will give the
7256 // opposive tautological answer. So these lanes would need to be fixed up.
7257 SDValue TautologicalInvertedChannels =
7258 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7259 Created.push_back(TautologicalInvertedChannels.getNode());
7260
7261 // NOTE: we avoid letting illegal types through even if we're before legalize
7262 // ops – legalization has a hard time producing good code for this.
7263 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7264 // If we have a vector select, let's replace the comparison results in the
7265 // affected lanes with the correct tautological result.
7266 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7267 DL, SETCCVT, SETCCVT);
7268 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7269 Replacement, NewCC);
7270 }
7271
7272 // Else, we can just invert the comparison result in the appropriate lanes.
7273 //
7274 // NOTE: see the note above VSELECT above.
7275 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7276 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7277 TautologicalInvertedChannels);
7278
7279 return SDValue(); // Don't know how to lower.
7280}
7281
7282/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7283/// where the divisor is constant and the comparison target is zero,
7284/// return a DAG expression that will generate the same comparison result
7285/// using only multiplications, additions and shifts/rotations.
7286/// Ref: "Hacker's Delight" 10-17.
7287SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7288 SDValue CompTargetNode,
7290 DAGCombinerInfo &DCI,
7291 const SDLoc &DL) const {
7293 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7294 DCI, DL, Built)) {
7295 assert(Built.size() <= 7 && "Max size prediction failed.");
7296 for (SDNode *N : Built)
7297 DCI.AddToWorklist(N);
7298 return Folded;
7299 }
7300
7301 return SDValue();
7302}
7303
7304SDValue
7305TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7306 SDValue CompTargetNode, ISD::CondCode Cond,
7307 DAGCombinerInfo &DCI, const SDLoc &DL,
7308 SmallVectorImpl<SDNode *> &Created) const {
7309 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7310 // Fold:
7311 // (seteq/ne (srem N, D), 0)
7312 // To:
7313 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7314 //
7315 // - D must be constant, with D = D0 * 2^K where D0 is odd
7316 // - P is the multiplicative inverse of D0 modulo 2^W
7317 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7318 // - Q = floor((2 * A) / (2^K))
7319 // where W is the width of the common type of N and D.
7320 //
7321 // When D is a power of two (and thus D0 is 1), the normal
7322 // formula for A and Q don't apply, because the derivation
7323 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7324 // does not apply. This specifically fails when N = INT_MIN.
7325 //
7326 // Instead, for power-of-two D, we use:
7327 // - A = 2^(W-1)
7328 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
7329 // - Q = 2^(W-K) - 1
7330 // |-> Test that the top K bits are zero after rotation
7331 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7332 "Only applicable for (in)equality comparisons.");
7333
7334 SelectionDAG &DAG = DCI.DAG;
7335
7336 EVT VT = REMNode.getValueType();
7337 EVT SVT = VT.getScalarType();
7338 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7339 EVT ShSVT = ShVT.getScalarType();
7340
7341 // If we are after ops legalization, and MUL is unavailable, we can not
7342 // proceed.
7343 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7344 return SDValue();
7345
7346 // TODO: Could support comparing with non-zero too.
7347 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7348 if (!CompTarget || !CompTarget->isZero())
7349 return SDValue();
7350
7351 bool HadIntMinDivisor = false;
7352 bool HadOneDivisor = false;
7353 bool AllDivisorsAreOnes = true;
7354 bool HadEvenDivisor = false;
7355 bool NeedToApplyOffset = false;
7356 bool AllDivisorsArePowerOfTwo = true;
7357 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7358
7359 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7360 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7361 if (C->isZero())
7362 return false;
7363
7364 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7365
7366 // WARNING: this fold is only valid for positive divisors!
7367 APInt D = C->getAPIntValue();
7368 if (D.isNegative())
7369 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7370
7371 HadIntMinDivisor |= D.isMinSignedValue();
7372
7373 // If all divisors are ones, we will prefer to avoid the fold.
7374 HadOneDivisor |= D.isOne();
7375 AllDivisorsAreOnes &= D.isOne();
7376
7377 // Decompose D into D0 * 2^K
7378 unsigned K = D.countr_zero();
7379 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7380 APInt D0 = D.lshr(K);
7381
7382 if (!D.isMinSignedValue()) {
7383 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7384 // we don't care about this lane in this fold, we'll special-handle it.
7385 HadEvenDivisor |= (K != 0);
7386 }
7387
7388 // D is a power-of-two if D0 is one. This includes INT_MIN.
7389 // If all divisors are power-of-two, we will prefer to avoid the fold.
7390 AllDivisorsArePowerOfTwo &= D0.isOne();
7391
7392 // P = inv(D0, 2^W)
7393 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7394 unsigned W = D.getBitWidth();
7395 APInt P = D0.multiplicativeInverse();
7396 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7397
7398 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7399 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7400 A.clearLowBits(K);
7401
7402 if (!D.isMinSignedValue()) {
7403 // If divisor INT_MIN, then we don't care about this lane in this fold,
7404 // we'll special-handle it.
7405 NeedToApplyOffset |= A != 0;
7406 }
7407
7408 // Q = floor((2 * A) / (2^K))
7409 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7410
7412 "We are expecting that A is always less than all-ones for SVT");
7414 "We are expecting that K is always less than all-ones for ShSVT");
7415
7416 // If D was a power of two, apply the alternate constant derivation.
7417 if (D0.isOne()) {
7418 // A = 2^(W-1)
7420 // - Q = 2^(W-K) - 1
7421 Q = APInt::getAllOnes(W - K).zext(W);
7422 }
7423
7424 // If the divisor is 1 the result can be constant-folded. Likewise, we
7425 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7426 if (D.isOne()) {
7427 // Set P, A and K to a bogus values so we can try to splat them.
7428 P = 0;
7429 A = -1;
7430 K = -1;
7431
7432 // x ?% 1 == 0 <--> true <--> x u<= -1
7433 Q = -1;
7434 }
7435
7436 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7437 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7438 KAmts.push_back(
7439 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K, /*isSigned=*/false,
7440 /*implicitTrunc=*/true),
7441 DL, ShSVT));
7442 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7443 return true;
7444 };
7445
7446 SDValue N = REMNode.getOperand(0);
7447 SDValue D = REMNode.getOperand(1);
7448
7449 // Collect the values from each element.
7450 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7451 return SDValue();
7452
7453 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7454 if (AllDivisorsAreOnes)
7455 return SDValue();
7456
7457 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7458 // since it can be best implemented as a bit test.
7459 if (AllDivisorsArePowerOfTwo)
7460 return SDValue();
7461
7462 SDValue PVal, AVal, KVal, QVal;
7463 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7464 if (HadOneDivisor) {
7465 // Try to turn PAmts into a splat, since we don't care about the values
7466 // that are currently '0'. If we can't, just keep '0'`s.
7468 // Try to turn AAmts into a splat, since we don't care about the
7469 // values that are currently '-1'. If we can't, change them to '0'`s.
7471 DAG.getConstant(0, DL, SVT));
7472 // Try to turn KAmts into a splat, since we don't care about the values
7473 // that are currently '-1'. If we can't, change them to '0'`s.
7475 DAG.getConstant(0, DL, ShSVT));
7476 }
7477
7478 PVal = DAG.getBuildVector(VT, DL, PAmts);
7479 AVal = DAG.getBuildVector(VT, DL, AAmts);
7480 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7481 QVal = DAG.getBuildVector(VT, DL, QAmts);
7482 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7483 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7484 QAmts.size() == 1 &&
7485 "Expected matchUnaryPredicate to return one element for scalable "
7486 "vectors");
7487 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7488 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7489 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7490 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7491 } else {
7492 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7493 PVal = PAmts[0];
7494 AVal = AAmts[0];
7495 KVal = KAmts[0];
7496 QVal = QAmts[0];
7497 }
7498
7499 // (mul N, P)
7500 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7501 Created.push_back(Op0.getNode());
7502
7503 if (NeedToApplyOffset) {
7504 // We need ADD to do this.
7505 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7506 return SDValue();
7507
7508 // (add (mul N, P), A)
7509 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7510 Created.push_back(Op0.getNode());
7511 }
7512
7513 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7514 // divisors as a performance improvement, since rotating by 0 is a no-op.
7515 if (HadEvenDivisor) {
7516 // We need ROTR to do this.
7517 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7518 return SDValue();
7519 // SREM: (rotr (add (mul N, P), A), K)
7520 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7521 Created.push_back(Op0.getNode());
7522 }
7523
7524 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7525 SDValue Fold =
7526 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7528
7529 // If we didn't have lanes with INT_MIN divisor, then we're done.
7530 if (!HadIntMinDivisor)
7531 return Fold;
7532
7533 // That fold is only valid for positive divisors. Which effectively means,
7534 // it is invalid for INT_MIN divisors. So if we have such a lane,
7535 // we must fix-up results for said lanes.
7536 assert(VT.isVector() && "Can/should only get here for vectors.");
7537
7538 // NOTE: we avoid letting illegal types through even if we're before legalize
7539 // ops – legalization has a hard time producing good code for the code that
7540 // follows.
7541 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7545 return SDValue();
7546
7547 Created.push_back(Fold.getNode());
7548
7549 SDValue IntMin = DAG.getConstant(
7551 SDValue IntMax = DAG.getConstant(
7553 SDValue Zero =
7555
7556 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7557 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7558 Created.push_back(DivisorIsIntMin.getNode());
7559
7560 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7561 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7562 Created.push_back(Masked.getNode());
7563 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7564 Created.push_back(MaskedIsZero.getNode());
7565
7566 // To produce final result we need to blend 2 vectors: 'SetCC' and
7567 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7568 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7569 // constant-folded, select can get lowered to a shuffle with constant mask.
7570 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7571 MaskedIsZero, Fold);
7572
7573 return Blended;
7574}
7575
7577 const DenormalMode &Mode,
7578 SDNodeFlags Flags) const {
7579 SDLoc DL(Op);
7580 EVT VT = Op.getValueType();
7581 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7582 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7583
7584 // This is specifically a check for the handling of denormal inputs, not the
7585 // result.
7586 if (Mode.Input == DenormalMode::PreserveSign ||
7587 Mode.Input == DenormalMode::PositiveZero) {
7588 // Test = X == 0.0
7589 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ, /*Chain=*/{},
7590 /*Signaling=*/false, Flags);
7591 }
7592
7593 // Testing it with denormal inputs to avoid wrong estimate.
7594 //
7595 // Test = fabs(X) < SmallestNormal
7596 const fltSemantics &FltSem = VT.getFltSemantics();
7597 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7598 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7599 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op, Flags);
7600 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT, /*Chain=*/{},
7601 /*Signaling=*/false, Flags);
7602}
7603
7605 bool LegalOps, bool OptForSize,
7607 unsigned Depth) const {
7608 // fneg is removable even if it has multiple uses.
7609 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7611 return Op.getOperand(0);
7612 }
7613
7614 // Don't recurse exponentially.
7616 return SDValue();
7617
7618 // Pre-increment recursion depth for use in recursive calls.
7619 ++Depth;
7620 const SDNodeFlags Flags = Op->getFlags();
7621 EVT VT = Op.getValueType();
7622 unsigned Opcode = Op.getOpcode();
7623
7624 // Don't allow anything with multiple uses unless we know it is free.
7625 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7626 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7627 isFPExtFree(VT, Op.getOperand(0).getValueType());
7628 if (!IsFreeExtend)
7629 return SDValue();
7630 }
7631
7632 auto RemoveDeadNode = [&](SDValue N) {
7633 if (N && N.getNode()->use_empty())
7634 DAG.RemoveDeadNode(N.getNode());
7635 };
7636
7637 SDLoc DL(Op);
7638
7639 // Because getNegatedExpression can delete nodes we need a handle to keep
7640 // temporary nodes alive in case the recursion manages to create an identical
7641 // node.
7642 std::list<HandleSDNode> Handles;
7643
7644 switch (Opcode) {
7645 case ISD::ConstantFP: {
7646 // Don't invert constant FP values after legalization unless the target says
7647 // the negated constant is legal.
7648 bool IsOpLegal =
7650 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7651 OptForSize);
7652
7653 if (LegalOps && !IsOpLegal)
7654 break;
7655
7656 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7657 V.changeSign();
7658 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7659
7660 // If we already have the use of the negated floating constant, it is free
7661 // to negate it even it has multiple uses.
7662 if (!Op.hasOneUse() && CFP.use_empty())
7663 break;
7665 return CFP;
7666 }
7667 case ISD::SPLAT_VECTOR: {
7668 // fold splat_vector(fneg(X)) -> splat_vector(-X)
7669 SDValue X = Op.getOperand(0);
7671 break;
7672
7673 SDValue NegX = getCheaperNegatedExpression(X, DAG, LegalOps, OptForSize);
7674 if (!NegX)
7675 break;
7677 return DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, NegX);
7678 }
7679 case ISD::BUILD_VECTOR: {
7680 // Only permit BUILD_VECTOR of constants.
7681 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7682 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7683 }))
7684 break;
7685
7686 bool IsOpLegal =
7689 llvm::all_of(Op->op_values(), [&](SDValue N) {
7690 return N.isUndef() ||
7691 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7692 OptForSize);
7693 });
7694
7695 if (LegalOps && !IsOpLegal)
7696 break;
7697
7699 for (SDValue C : Op->op_values()) {
7700 if (C.isUndef()) {
7701 Ops.push_back(C);
7702 continue;
7703 }
7704 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7705 V.changeSign();
7706 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7707 }
7709 return DAG.getBuildVector(VT, DL, Ops);
7710 }
7711 case ISD::FADD: {
7712 if (!Flags.hasNoSignedZeros())
7713 break;
7714
7715 // After operation legalization, it might not be legal to create new FSUBs.
7716 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7717 break;
7718 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7719
7720 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7722 SDValue NegX =
7723 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7724 // Prevent this node from being deleted by the next call.
7725 if (NegX)
7726 Handles.emplace_back(NegX);
7727
7728 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7730 SDValue NegY =
7731 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7732
7733 // We're done with the handles.
7734 Handles.clear();
7735
7736 // Negate the X if its cost is less or equal than Y.
7737 if (NegX && (CostX <= CostY)) {
7738 Cost = CostX;
7739 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7740 if (NegY != N)
7741 RemoveDeadNode(NegY);
7742 return N;
7743 }
7744
7745 // Negate the Y if it is not expensive.
7746 if (NegY) {
7747 Cost = CostY;
7748 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7749 if (NegX != N)
7750 RemoveDeadNode(NegX);
7751 return N;
7752 }
7753 break;
7754 }
7755 case ISD::FSUB: {
7756 // We can't turn -(A-B) into B-A when we honor signed zeros.
7757 if (!Flags.hasNoSignedZeros())
7758 break;
7759
7760 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7761 // fold (fneg (fsub 0, Y)) -> Y
7762 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7763 if (C->isZero()) {
7765 return Y;
7766 }
7767
7768 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7770 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7771 }
7772 case ISD::FMUL:
7773 case ISD::FDIV: {
7774 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7775
7776 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7778 SDValue NegX =
7779 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7780 // Prevent this node from being deleted by the next call.
7781 if (NegX)
7782 Handles.emplace_back(NegX);
7783
7784 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7786 SDValue NegY =
7787 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7788
7789 // We're done with the handles.
7790 Handles.clear();
7791
7792 // Negate the X if its cost is less or equal than Y.
7793 if (NegX && (CostX <= CostY)) {
7794 Cost = CostX;
7795 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7796 if (NegY != N)
7797 RemoveDeadNode(NegY);
7798 return N;
7799 }
7800
7801 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7802 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7803 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7804 break;
7805
7806 // Negate the Y if it is not expensive.
7807 if (NegY) {
7808 Cost = CostY;
7809 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7810 if (NegX != N)
7811 RemoveDeadNode(NegX);
7812 return N;
7813 }
7814 break;
7815 }
7816 case ISD::FMA:
7817 case ISD::FMULADD:
7818 case ISD::FMAD: {
7819 if (!Flags.hasNoSignedZeros())
7820 break;
7821
7822 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7824 SDValue NegZ =
7825 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7826 // Give up if fail to negate the Z.
7827 if (!NegZ)
7828 break;
7829
7830 // Prevent this node from being deleted by the next two calls.
7831 Handles.emplace_back(NegZ);
7832
7833 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7835 SDValue NegX =
7836 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7837 // Prevent this node from being deleted by the next call.
7838 if (NegX)
7839 Handles.emplace_back(NegX);
7840
7841 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7843 SDValue NegY =
7844 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7845
7846 // We're done with the handles.
7847 Handles.clear();
7848
7849 // Negate the X if its cost is less or equal than Y.
7850 if (NegX && (CostX <= CostY)) {
7851 Cost = std::min(CostX, CostZ);
7852 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7853 if (NegY != N)
7854 RemoveDeadNode(NegY);
7855 return N;
7856 }
7857
7858 // Negate the Y if it is not expensive.
7859 if (NegY) {
7860 Cost = std::min(CostY, CostZ);
7861 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7862 if (NegX != N)
7863 RemoveDeadNode(NegX);
7864 return N;
7865 }
7866 break;
7867 }
7868
7869 case ISD::FP_EXTEND:
7870 case ISD::FSIN:
7871 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7872 OptForSize, Cost, Depth))
7873 return DAG.getNode(Opcode, DL, VT, NegV);
7874 break;
7875 case ISD::FP_ROUND:
7876 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7877 OptForSize, Cost, Depth))
7878 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7879 break;
7880 case ISD::SELECT:
7881 case ISD::VSELECT: {
7882 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7883 // iff at least one cost is cheaper and the other is neutral/cheaper
7884 SDValue LHS = Op.getOperand(1);
7886 SDValue NegLHS =
7887 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7888 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7889 RemoveDeadNode(NegLHS);
7890 break;
7891 }
7892
7893 // Prevent this node from being deleted by the next call.
7894 Handles.emplace_back(NegLHS);
7895
7896 SDValue RHS = Op.getOperand(2);
7898 SDValue NegRHS =
7899 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7900
7901 // We're done with the handles.
7902 Handles.clear();
7903
7904 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7905 (CostLHS != NegatibleCost::Cheaper &&
7906 CostRHS != NegatibleCost::Cheaper)) {
7907 RemoveDeadNode(NegLHS);
7908 RemoveDeadNode(NegRHS);
7909 break;
7910 }
7911
7912 Cost = std::min(CostLHS, CostRHS);
7913 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7914 }
7915 }
7916
7917 return SDValue();
7918}
7919
7920//===----------------------------------------------------------------------===//
7921// Legalization Utilities
7922//===----------------------------------------------------------------------===//
7923
7924bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7925 SDValue LHS, SDValue RHS,
7927 EVT HiLoVT, SelectionDAG &DAG,
7928 MulExpansionKind Kind, SDValue LL,
7929 SDValue LH, SDValue RL, SDValue RH) const {
7930 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7931 Opcode == ISD::SMUL_LOHI);
7932
7933 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7935 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7937 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7939 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7941
7942 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7943 return false;
7944
7945 unsigned OuterBitSize = VT.getScalarSizeInBits();
7946 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7947
7948 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7949 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7950 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7951
7952 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7953 bool Signed) -> bool {
7954 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7955 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7956 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7957 Hi = Lo.getValue(1);
7958 return true;
7959 }
7960 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7961 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7962 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7963 return true;
7964 }
7965 return false;
7966 };
7967
7968 SDValue Lo, Hi;
7969
7970 if (!LL.getNode() && !RL.getNode() &&
7972 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7973 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7974 }
7975
7976 if (!LL.getNode())
7977 return false;
7978
7979 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7980 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7981 DAG.MaskedValueIsZero(RHS, HighMask)) {
7982 // The inputs are both zero-extended.
7983 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7984 Result.push_back(Lo);
7985 Result.push_back(Hi);
7986 if (Opcode != ISD::MUL) {
7987 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7988 Result.push_back(Zero);
7989 Result.push_back(Zero);
7990 }
7991 return true;
7992 }
7993 }
7994
7995 if (!VT.isVector() && Opcode == ISD::MUL &&
7996 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7997 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7998 // The input values are both sign-extended.
7999 // TODO non-MUL case?
8000 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
8001 Result.push_back(Lo);
8002 Result.push_back(Hi);
8003 return true;
8004 }
8005 }
8006
8007 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
8008 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
8009
8010 if (!LH.getNode() && !RH.getNode() &&
8013 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
8014 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
8015 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
8016 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
8017 }
8018
8019 if (!LH.getNode())
8020 return false;
8021
8022 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
8023 return false;
8024
8025 Result.push_back(Lo);
8026
8027 if (Opcode == ISD::MUL) {
8028 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
8029 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
8030 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
8031 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
8032 Result.push_back(Hi);
8033 return true;
8034 }
8035
8036 // Compute the full width result.
8037 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
8038 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
8039 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
8040 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
8041 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
8042 };
8043
8044 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
8045 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
8046 return false;
8047
8048 // This is effectively the add part of a multiply-add of half-sized operands,
8049 // so it cannot overflow.
8050 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8051
8052 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
8053 return false;
8054
8055 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
8056 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8057
8058 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
8060 if (UseGlue)
8061 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
8062 Merge(Lo, Hi));
8063 else
8064 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
8065 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
8066
8067 SDValue Carry = Next.getValue(1);
8068 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8069 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8070
8071 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
8072 return false;
8073
8074 if (UseGlue)
8075 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
8076 Carry);
8077 else
8078 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
8079 Zero, Carry);
8080
8081 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8082
8083 if (Opcode == ISD::SMUL_LOHI) {
8084 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8085 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
8086 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
8087
8088 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8089 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
8090 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
8091 }
8092
8093 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8094 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8095 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8096 return true;
8097}
8098
8100 SelectionDAG &DAG, MulExpansionKind Kind,
8101 SDValue LL, SDValue LH, SDValue RL,
8102 SDValue RH) const {
8104 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
8105 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
8106 DAG, Kind, LL, LH, RL, RH);
8107 if (Ok) {
8108 assert(Result.size() == 2);
8109 Lo = Result[0];
8110 Hi = Result[1];
8111 }
8112 return Ok;
8113}
8114
8115// Optimize unsigned division or remainder by constants for types twice as large
8116// as a legal VT.
8117//
8118// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
8119// can be computed
8120// as:
8121// Sum = __builtin_uadd_overflow(Lo, High, &Sum);
8122// Remainder = Sum % Constant;
8123//
8124// If (1 << (BitWidth / 2)) % Constant != 1, we can search for a smaller value
8125// W such that W != (BitWidth / 2) and (1 << W) % Constant == 1. We can break
8126// High:Low into 3 chunks of W bits and compute remainder as
8127// Sum = Chunk0 + Chunk1 + Chunk2;
8128// Remainder = Sum % Constant;
8129//
8130// This is based on "Remainder by Summing Digits" from Hacker's Delight.
8131//
8132// For division, we can compute the remainder using the algorithm described
8133// above, subtract it from the dividend to get an exact multiple of Constant.
8134// Then multiply that exact multiply by the multiplicative inverse modulo
8135// (1 << (BitWidth / 2)) to get the quotient.
8136
8137// If Constant is even, we can shift right the dividend and the divisor by the
8138// number of trailing zeros in Constant before applying the remainder algorithm.
8139// If we're after the quotient, we can subtract this value from the shifted
8140// dividend and multiply by the multiplicative inverse of the shifted divisor.
8141// If we want the remainder, we shift the value left by the number of trailing
8142// zeros and add the bits that were shifted out of the dividend.
8145 EVT HiLoVT, SelectionDAG &DAG,
8146 SDValue LL, SDValue LH) const {
8147 unsigned Opcode = N->getOpcode();
8148 EVT VT = N->getValueType(0);
8149
8150 // TODO: Support signed division/remainder.
8151 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8152 return false;
8153 assert(
8154 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8155 "Unexpected opcode");
8156
8157 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8158 if (!CN)
8159 return false;
8160
8161 APInt Divisor = CN->getAPIntValue();
8162 unsigned BitWidth = Divisor.getBitWidth();
8163 unsigned HBitWidth = BitWidth / 2;
8165 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8166
8167 // Divisor needs to less than (1 << HBitWidth).
8168 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8169 if (Divisor.uge(HalfMaxPlus1))
8170 return false;
8171
8172 // We depend on the UREM by constant optimization in DAGCombiner that requires
8173 // high multiply.
8174 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8176 return false;
8177
8178 // Don't expand if optimizing for size.
8179 if (DAG.shouldOptForSize())
8180 return false;
8181
8182 // Early out for 0 or 1 divisors.
8183 if (Divisor.ule(1))
8184 return false;
8185
8186 // If the divisor is even, shift it until it becomes odd.
8187 unsigned TrailingZeros = 0;
8188 if (!Divisor[0]) {
8189 TrailingZeros = Divisor.countr_zero();
8190 Divisor.lshrInPlace(TrailingZeros);
8191 }
8192
8193 // Look for the largest chunk width W such that (1 << W) % Divisor == 1.
8194 unsigned BestChunkWidth = 0;
8195 for (unsigned I = HBitWidth, E = HBitWidth / 2; I > E; --I) {
8196 APInt Mod = APInt::getOneBitSet(Divisor.getBitWidth(), I).urem(Divisor);
8197
8198 if (!Mod.isOne())
8199 continue;
8200
8201 // If best chunk is HBitWidth, we can use it and handle the carry out.
8202 // Otherwise, ensure the sum won't overflow HiLoVT (HBitWidth).
8203 // Summing N chunks adds ceil(log2(N)) extra carry bits to the width.
8204 // Safety check: Base Chunk Width (I) + Carry Bits <= Register Width.
8205 unsigned NumChunks = divideCeil(BitWidth, I);
8206 if (I == HBitWidth || I + llvm::bit_width(NumChunks - 1) <= HBitWidth) {
8207 BestChunkWidth = I;
8208 break;
8209 }
8210 }
8211
8212 // If we didn't find a chunk size, exit.
8213 if (!BestChunkWidth)
8214 return false;
8215
8216 SDLoc dl(N);
8217
8218 assert(!LL == !LH && "Expected both input halves or no input halves!");
8219 if (!LL)
8220 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8221
8222 bool HasFSHR = isOperationLegal(ISD::FSHR, HiLoVT);
8223
8224 // Shift the input by the number of TrailingZeros in the divisor. The
8225 // shifted out bits will be added to the remainder later.
8226 SDValue PartialRem;
8227 if (TrailingZeros) {
8228 // Save the shifted off bits if we need the remainder.
8229 if (Opcode != ISD::UDIV) {
8230 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8231 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8232 DAG.getConstant(Mask, dl, HiLoVT));
8233 }
8234
8235 if (HasFSHR)
8236 LL = DAG.getNode(ISD::FSHR, dl, HiLoVT, LH, LL,
8237 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8238 else
8239 LL = DAG.getNode(
8240 ISD::OR, dl, HiLoVT,
8241 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
8242 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
8243 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
8244 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
8245 HiLoVT, dl)));
8246 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
8247 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8248 }
8249
8250 SDValue Sum;
8251 // If BestChunkWidth is HBitWidth add low and high half. If there is a carry
8252 // out, add that to the final sum.
8253 if (BestChunkWidth == HBitWidth) {
8254 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8255 EVT SetCCType =
8256 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8258 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8259 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8260 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8261 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8262 } else {
8263 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8264 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8265 // If the boolean for the target is 0 or 1, we can add the setcc result
8266 // directly.
8267 if (getBooleanContents(HiLoVT) ==
8269 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8270 else
8271 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8272 DAG.getConstant(0, dl, HiLoVT));
8273 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8274 }
8275 } else {
8276 // Otherwise split into multple chunks and add them together. We chose
8277 // BestChunkWidth so that the sum will not overflow.
8278 SDValue Mask = DAG.getConstant(
8279 APInt::getLowBitsSet(HBitWidth, BestChunkWidth), dl, HiLoVT);
8280
8281 for (unsigned I = 0; I < BitWidth; I += BestChunkWidth) {
8282 SDValue Chunk;
8283 if (I == 0) {
8284 Chunk = LL;
8285 } else if (I >= HBitWidth) {
8286 Chunk =
8287 DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
8288 DAG.getShiftAmountConstant(I - HBitWidth, HiLoVT, dl));
8289 } else if (HasFSHR) {
8290 Chunk = DAG.getNode(ISD::FSHR, dl, HiLoVT, LH, LL,
8291 DAG.getShiftAmountConstant(I, HiLoVT, dl));
8292 } else {
8293 Chunk = DAG.getNode(
8294 ISD::OR, dl, HiLoVT,
8295 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
8296 DAG.getShiftAmountConstant(I, HiLoVT, dl)),
8297 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
8298 DAG.getShiftAmountConstant(HBitWidth - I, HiLoVT, dl)));
8299 }
8300
8301 // For the last chunk, we might not need a mask if it's smaller than
8302 // BestChunkWidth, but applying it is always safe.
8303 Chunk = DAG.getNode(ISD::AND, dl, HiLoVT, Chunk, Mask);
8304 if (!Sum)
8305 Sum = Chunk;
8306 else
8307 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Chunk);
8308 }
8309 }
8310
8311 // Perform a HiLoVT urem on the Sum using truncated divisor.
8312 SDValue RemL =
8313 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8314 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8315 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8316
8317 if (Opcode != ISD::UREM) {
8318 // Subtract the remainder from the shifted dividend.
8319 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8320 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8321
8322 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8323
8324 // Multiply by the multiplicative inverse of the divisor modulo
8325 // (1 << BitWidth).
8326 APInt MulFactor = Divisor.multiplicativeInverse();
8327
8328 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8329 DAG.getConstant(MulFactor, dl, VT));
8330
8331 // Split the quotient into low and high parts.
8332 SDValue QuotL, QuotH;
8333 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8334 Result.push_back(QuotL);
8335 Result.push_back(QuotH);
8336 }
8337
8338 if (Opcode != ISD::UDIV) {
8339 // If we shifted the input, shift the remainder left and add the bits we
8340 // shifted off the input. This add does not overflow.
8341 if (TrailingZeros) {
8342 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8343 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8344
8345 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
8346 }
8347 Result.push_back(RemL);
8348 Result.push_back(RemH);
8349 }
8350
8351 return true;
8352}
8353
8354// Check that (every element of) Z is undef or not an exact multiple of BW.
8355static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8357 Z,
8358 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8359 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8360}
8361
8363 EVT VT = Node->getValueType(0);
8364 SDValue ShX, ShY;
8365 SDValue ShAmt, InvShAmt;
8366 SDValue X = Node->getOperand(0);
8367 SDValue Y = Node->getOperand(1);
8368 SDValue Z = Node->getOperand(2);
8369 SDValue Mask = Node->getOperand(3);
8370 SDValue VL = Node->getOperand(4);
8371
8372 unsigned BW = VT.getScalarSizeInBits();
8373 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8374 SDLoc DL(SDValue(Node, 0));
8375
8376 EVT ShVT = Z.getValueType();
8377 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8378 // fshl: X << C | Y >> (BW - C)
8379 // fshr: X << (BW - C) | Y >> C
8380 // where C = Z % BW is not zero
8381 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8382 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8383 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8384 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8385 VL);
8386 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8387 VL);
8388 } else {
8389 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8390 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8391 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8392 if (isPowerOf2_32(BW)) {
8393 // Z % BW -> Z & (BW - 1)
8394 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8395 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8396 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8397 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8398 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8399 } else {
8400 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8401 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8402 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8403 }
8404
8405 SDValue One = DAG.getConstant(1, DL, ShVT);
8406 if (IsFSHL) {
8407 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8408 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8409 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8410 } else {
8411 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8412 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8413 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8414 }
8415 }
8416 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8417}
8418
8420 SelectionDAG &DAG) const {
8421 if (Node->isVPOpcode())
8422 return expandVPFunnelShift(Node, DAG);
8423
8424 EVT VT = Node->getValueType(0);
8425
8426 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8430 return SDValue();
8431
8432 SDValue X = Node->getOperand(0);
8433 SDValue Y = Node->getOperand(1);
8434 SDValue Z = Node->getOperand(2);
8435
8436 unsigned BW = VT.getScalarSizeInBits();
8437 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8438 SDLoc DL(SDValue(Node, 0));
8439
8440 EVT ShVT = Z.getValueType();
8441
8442 // If a funnel shift in the other direction is more supported, use it.
8443 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8444 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8445 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8446 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8447 // fshl X, Y, Z -> fshr X, Y, -Z
8448 // fshr X, Y, Z -> fshl X, Y, -Z
8449 Z = DAG.getNegative(Z, DL, ShVT);
8450 } else {
8451 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8452 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8453 SDValue One = DAG.getConstant(1, DL, ShVT);
8454 if (IsFSHL) {
8455 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8456 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8457 } else {
8458 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8459 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8460 }
8461 Z = DAG.getNOT(DL, Z, ShVT);
8462 }
8463 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8464 }
8465
8466 SDValue ShX, ShY;
8467 SDValue ShAmt, InvShAmt;
8468 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8469 // fshl: X << C | Y >> (BW - C)
8470 // fshr: X << (BW - C) | Y >> C
8471 // where C = Z % BW is not zero
8472 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8473 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8474 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8475 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8476 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8477 } else {
8478 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8479 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8480 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8481 if (isPowerOf2_32(BW)) {
8482 // Z % BW -> Z & (BW - 1)
8483 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8484 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8485 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8486 } else {
8487 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8488 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8489 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8490 }
8491
8492 SDValue One = DAG.getConstant(1, DL, ShVT);
8493 if (IsFSHL) {
8494 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8495 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8496 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8497 } else {
8498 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8499 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8500 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8501 }
8502 }
8503 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8504}
8505
8506// TODO: Merge with expandFunnelShift.
8508 SelectionDAG &DAG) const {
8509 EVT VT = Node->getValueType(0);
8510 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8511 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8512 SDValue Op0 = Node->getOperand(0);
8513 SDValue Op1 = Node->getOperand(1);
8514 SDLoc DL(SDValue(Node, 0));
8515
8516 EVT ShVT = Op1.getValueType();
8517 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8518
8519 // If a rotate in the other direction is more supported, use it.
8520 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8521 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8522 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8523 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8524 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8525 }
8526
8527 if (!AllowVectorOps && VT.isVector() &&
8533 return SDValue();
8534
8535 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8536 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8537 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8538 SDValue ShVal;
8539 SDValue HsVal;
8540 if (isPowerOf2_32(EltSizeInBits)) {
8541 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8542 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8543 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8544 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8545 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8546 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8547 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8548 } else {
8549 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8550 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8551 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8552 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8553 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8554 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8555 SDValue One = DAG.getConstant(1, DL, ShVT);
8556 HsVal =
8557 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8558 }
8559 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8560}
8561
8562/// Check if CLMUL on VT can eventually reach a type with legal CLMUL through
8563/// a chain of halving decompositions (halving element width) and/or vector
8564/// widening (doubling element count). This guides expansion strategy selection:
8565/// if true, the halving/widening path produces better code than bit-by-bit.
8566///
8567/// HalveDepth tracks halving steps only (each creates ~4x more operations).
8568/// Widening steps are cheap (O(1) pad/extract) and don't count.
8569/// Limiting halvings to 2 prevents exponential blowup:
8570/// 1 halving: ~4 sub-CLMULs (good, e.g. v8i16 -> v8i8)
8571/// 2 halvings: ~16 sub-CLMULs (acceptable, e.g. v4i32 -> v4i16 -> v8i8)
8572/// 3 halvings: ~64 sub-CLMULs (worse than bit-by-bit expansion)
8574 EVT VT, unsigned HalveDepth = 0,
8575 unsigned TotalDepth = 0) {
8576 if (HalveDepth > 2 || TotalDepth > 8 || !VT.isFixedLengthVector())
8577 return false;
8579 return true;
8580 if (!TLI.isTypeLegal(VT))
8581 return false;
8582
8583 unsigned BW = VT.getScalarSizeInBits();
8584
8585 // Halve: halve element width, same element count.
8586 // This is the expensive step -- each halving creates ~4x more operations.
8587 if (BW % 2 == 0) {
8588 EVT HalfEltVT = EVT::getIntegerVT(Ctx, BW / 2);
8589 EVT HalfVT = VT.changeVectorElementType(Ctx, HalfEltVT);
8590 if (TLI.isTypeLegal(HalfVT) &&
8591 canNarrowCLMULToLegal(TLI, Ctx, HalfVT, HalveDepth + 1, TotalDepth + 1))
8592 return true;
8593 }
8594
8595 // Widen: double element count (fixed-width vectors only).
8596 // This is cheap -- just INSERT_SUBVECTOR + EXTRACT_SUBVECTOR.
8597 EVT WideVT = VT.getDoubleNumVectorElementsVT(Ctx);
8598 if (TLI.isTypeLegal(WideVT) &&
8599 canNarrowCLMULToLegal(TLI, Ctx, WideVT, HalveDepth, TotalDepth + 1))
8600 return true;
8601
8602 return false;
8603}
8604
8606 SDLoc DL(Node);
8607 EVT VT = Node->getValueType(0);
8608 SDValue X = Node->getOperand(0);
8609 SDValue Y = Node->getOperand(1);
8610 unsigned BW = VT.getScalarSizeInBits();
8611 unsigned Opcode = Node->getOpcode();
8612 LLVMContext &Ctx = *DAG.getContext();
8613
8614 switch (Opcode) {
8615 case ISD::CLMUL: {
8616 // For vector types, try decomposition strategies that leverage legal
8617 // CLMUL on narrower or wider element types, avoiding the expensive
8618 // bit-by-bit expansion.
8619 if (VT.isVector()) {
8620 // Strategy 1: Halving decomposition to half-element-width CLMUL.
8621 // Applies ExpandIntRes_CLMUL's identity element-wise:
8622 // CLMUL(X, Y) = (Hi << HalfBW) | Lo
8623 // where:
8624 // Lo = CLMUL(XLo, YLo)
8625 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8626 unsigned HalfBW = BW / 2;
8627 if (BW % 2 == 0) {
8628 EVT HalfEltVT = EVT::getIntegerVT(Ctx, HalfBW);
8629 EVT HalfVT =
8630 EVT::getVectorVT(Ctx, HalfEltVT, VT.getVectorElementCount());
8631 if (isTypeLegal(HalfVT) && canNarrowCLMULToLegal(*this, Ctx, HalfVT,
8632 /*HalveDepth=*/1)) {
8633 SDValue ShAmt = DAG.getShiftAmountConstant(HalfBW, VT, DL);
8634
8635 // Extract low and high halves of each element.
8636 SDValue XLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, X);
8637 SDValue XHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
8638 DAG.getNode(ISD::SRL, DL, VT, X, ShAmt));
8639 SDValue YLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, Y);
8640 SDValue YHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
8641 DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt));
8642
8643 // Lo = CLMUL(XLo, YLo)
8644 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YLo);
8645
8646 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8647 SDValue LoH = DAG.getNode(ISD::CLMULH, DL, HalfVT, XLo, YLo);
8648 SDValue Cross1 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YHi);
8649 SDValue Cross2 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XHi, YLo);
8650 SDValue Cross = DAG.getNode(ISD::XOR, DL, HalfVT, Cross1, Cross2);
8651 SDValue Hi = DAG.getNode(ISD::XOR, DL, HalfVT, LoH, Cross);
8652
8653 // Reassemble: Result = ZExt(Lo) | (AnyExt(Hi) << HalfBW)
8654 SDValue LoExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo);
8655 SDValue HiExt = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Hi);
8656 SDValue HiShifted = DAG.getNode(ISD::SHL, DL, VT, HiExt, ShAmt);
8657 return DAG.getNode(ISD::OR, DL, VT, LoExt, HiShifted);
8658 }
8659 }
8660
8661 // Strategy 2: Promote to double-element-width CLMUL.
8662 // CLMUL(X, Y) = Trunc(CLMUL(AnyExt(X), AnyExt(Y)))
8663 {
8664 EVT ExtVT = VT.changeElementType(Ctx, EVT::getIntegerVT(Ctx, 2 * BW));
8665 if (isTypeLegal(ExtVT) && isOperationLegalOrCustom(ISD::CLMUL, ExtVT)) {
8666 // If CLMUL on ExtVT is Custom (not Legal), the target may
8667 // scalarize it, costing O(NumElements) scalar ops. The bit-by-bit
8668 // fallback costs O(BW) vectorized iterations. Only widen when
8669 // element count is small enough that scalarization is cheaper.
8670 unsigned NumElts = VT.getVectorMinNumElements();
8671 if (isOperationLegal(ISD::CLMUL, ExtVT) || NumElts < BW) {
8672 SDValue XExt = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, X);
8673 SDValue YExt = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, Y);
8674 SDValue Mul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8675 return DAG.getNode(ISD::TRUNCATE, DL, VT, Mul);
8676 }
8677 }
8678 }
8679
8680 // Strategy 3: Widen element count (pad with undef, do CLMUL on wider
8681 // vector, extract lower result). CLMUL is element-wise, so upper
8682 // (undef) lanes don't affect the lower results.
8683 // e.g. v4i16 => pad to v8i16 => halve to v8i8 PMUL => extract v4i16.
8684 if (auto EC = VT.getVectorElementCount(); EC.isFixed()) {
8685 EVT WideVT = EVT::getVectorVT(Ctx, VT.getVectorElementType(), EC * 2);
8686 if (isTypeLegal(WideVT) && canNarrowCLMULToLegal(*this, Ctx, WideVT)) {
8687 SDValue Undef = DAG.getUNDEF(WideVT);
8688 SDValue XWide = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, Undef,
8689 X, DAG.getVectorIdxConstant(0, DL));
8690 SDValue YWide = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, Undef,
8691 Y, DAG.getVectorIdxConstant(0, DL));
8692 SDValue WideRes = DAG.getNode(ISD::CLMUL, DL, WideVT, XWide, YWide);
8693 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, WideRes,
8694 DAG.getVectorIdxConstant(0, DL));
8695 }
8696 }
8697 }
8698
8699 // NOTE: If you change this expansion, please update the cost model
8700 // calculation in BasicTTIImpl::getTypeBasedIntrinsicInstrCost for
8701 // Intrinsic::clmul.
8702
8703 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
8704
8705 SDValue Res = DAG.getConstant(0, DL, VT);
8706 for (unsigned I = 0; I < BW; ++I) {
8707 SDValue ShiftAmt = DAG.getShiftAmountConstant(I, VT, DL);
8708 SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT);
8709 SDValue YMasked = DAG.getNode(ISD::AND, DL, VT, Y, Mask);
8710
8711 // For targets with a fast bit test instruction (e.g., x86 BT) or without
8712 // multiply, use a shift-based expansion to avoid expensive MUL
8713 // instructions.
8714 SDValue Part;
8715 if (!hasBitTest(Y, ShiftAmt) &&
8718 Part = DAG.getNode(ISD::MUL, DL, VT, X, YMasked);
8719 } else {
8720 // Canonical bit test: (Y & (1 << I)) != 0
8721 SDValue Zero = DAG.getConstant(0, DL, VT);
8722 SDValue Cond = DAG.getSetCC(DL, SetCCVT, YMasked, Zero, ISD::SETEQ);
8723 SDValue XShifted = DAG.getNode(ISD::SHL, DL, VT, X, ShiftAmt);
8724 Part = DAG.getSelect(DL, VT, Cond, Zero, XShifted);
8725 }
8726 Res = DAG.getNode(ISD::XOR, DL, VT, Res, Part);
8727 }
8728 return Res;
8729 }
8730 case ISD::CLMULR:
8731 // If we have CLMUL/CLMULH, merge the shifted results to form CLMULR.
8734 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, VT, X, Y);
8735 SDValue Hi = DAG.getNode(ISD::CLMULH, DL, VT, X, Y);
8736 Lo = DAG.getNode(ISD::SRL, DL, VT, Lo,
8737 DAG.getShiftAmountConstant(BW - 1, VT, DL));
8738 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8739 DAG.getShiftAmountConstant(1, VT, DL));
8740 return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
8741 }
8742 [[fallthrough]];
8743 case ISD::CLMULH: {
8744 EVT ExtVT = VT.changeElementType(Ctx, EVT::getIntegerVT(Ctx, 2 * BW));
8745 // Use bitreverse-based lowering (CLMULR/H = rev(CLMUL(rev,rev)) >> S)
8746 // when any of these hold:
8747 // (a) ZERO_EXTEND to ExtVT or SRL on ExtVT isn't legal.
8748 // (b) CLMUL is legal on VT but not on ExtVT (e.g. v8i8 on AArch64).
8749 // (c) CLMUL on ExtVT isn't legal, but CLMUL on VT can be efficiently
8750 // expanded via halving/widening to reach legal CLMUL. The bitreverse
8751 // path creates CLMUL(VT) which will be expanded efficiently. The
8752 // promote path would create CLMUL(ExtVT) => halving => CLMULH(VT),
8753 // causing a cycle.
8754 // Note: when CLMUL is legal on ExtVT, the zext => CLMUL(ExtVT) => shift
8755 // => trunc path is preferred over the bitreverse path, as it avoids the
8756 // cost of 3 bitreverse operations.
8761 canNarrowCLMULToLegal(*this, Ctx, VT)))) {
8762 SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X);
8763 SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, Y);
8764 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev);
8765 SDValue Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ClMul);
8766 if (Opcode == ISD::CLMULH)
8767 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
8768 DAG.getShiftAmountConstant(1, VT, DL));
8769 return Res;
8770 }
8771 SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X);
8772 SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y);
8773 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8774 unsigned ShAmt = Opcode == ISD::CLMULR ? BW - 1 : BW;
8775 SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul,
8776 DAG.getShiftAmountConstant(ShAmt, ExtVT, DL));
8777 return DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits);
8778 }
8779 }
8780 llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH");
8781}
8782
8784 SelectionDAG &DAG) const {
8785 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8786 EVT VT = Node->getValueType(0);
8787 unsigned VTBits = VT.getScalarSizeInBits();
8788 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8789
8790 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8791 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8792 SDValue ShOpLo = Node->getOperand(0);
8793 SDValue ShOpHi = Node->getOperand(1);
8794 SDValue ShAmt = Node->getOperand(2);
8795 EVT ShAmtVT = ShAmt.getValueType();
8796 EVT ShAmtCCVT =
8797 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8798 SDLoc dl(Node);
8799
8800 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8801 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8802 // away during isel.
8803 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8804 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8805 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8806 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8807 : DAG.getConstant(0, dl, VT);
8808
8809 SDValue Tmp2, Tmp3;
8810 if (IsSHL) {
8811 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8812 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8813 } else {
8814 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8815 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8816 }
8817
8818 // If the shift amount is larger or equal than the width of a part we don't
8819 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8820 // values for large shift amounts.
8821 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8822 DAG.getConstant(VTBits, dl, ShAmtVT));
8823 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8824 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8825
8826 if (IsSHL) {
8827 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8828 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8829 } else {
8830 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8831 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8832 }
8833}
8834
8836 SelectionDAG &DAG) const {
8837 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8838 SDValue Src = Node->getOperand(OpNo);
8839 EVT SrcVT = Src.getValueType();
8840 EVT DstVT = Node->getValueType(0);
8841 SDLoc dl(SDValue(Node, 0));
8842
8843 // FIXME: Only f32 to i64 conversions are supported.
8844 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8845 return false;
8846
8847 if (Node->isStrictFPOpcode())
8848 // When a NaN is converted to an integer a trap is allowed. We can't
8849 // use this expansion here because it would eliminate that trap. Other
8850 // traps are also allowed and cannot be eliminated. See
8851 // IEEE 754-2008 sec 5.8.
8852 return false;
8853
8854 // Expand f32 -> i64 conversion
8855 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8856 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8857 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8858 EVT IntVT = SrcVT.changeTypeToInteger();
8859 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8860
8861 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8862 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8863 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8864 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8865 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8866 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8867
8868 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8869
8870 SDValue ExponentBits = DAG.getNode(
8871 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8872 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8873 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8874
8875 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8876 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8877 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8878 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8879
8880 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8881 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8882 DAG.getConstant(0x00800000, dl, IntVT));
8883
8884 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8885
8886 R = DAG.getSelectCC(
8887 dl, Exponent, ExponentLoBit,
8888 DAG.getNode(ISD::SHL, dl, DstVT, R,
8889 DAG.getZExtOrTrunc(
8890 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8891 dl, IntShVT)),
8892 DAG.getNode(ISD::SRL, dl, DstVT, R,
8893 DAG.getZExtOrTrunc(
8894 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8895 dl, IntShVT)),
8896 ISD::SETGT);
8897
8898 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8899 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8900
8901 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8902 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8903 return true;
8904}
8905
8907 SDValue &Chain,
8908 SelectionDAG &DAG) const {
8909 SDLoc dl(SDValue(Node, 0));
8910 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8911 SDValue Src = Node->getOperand(OpNo);
8912
8913 EVT SrcVT = Src.getValueType();
8914 EVT DstVT = Node->getValueType(0);
8915 EVT SetCCVT =
8916 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8917 EVT DstSetCCVT =
8918 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8919
8920 // Only expand vector types if we have the appropriate vector bit operations.
8921 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8923 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8925 return false;
8926
8927 // If the maximum float value is smaller then the signed integer range,
8928 // the destination signmask can't be represented by the float, so we can
8929 // just use FP_TO_SINT directly.
8930 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8931 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8932 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8934 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8935 if (Node->isStrictFPOpcode()) {
8936 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8937 { Node->getOperand(0), Src });
8938 Chain = Result.getValue(1);
8939 } else
8940 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8941 return true;
8942 }
8943
8944 // Don't expand it if there isn't cheap fsub instruction.
8946 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8947 return false;
8948
8949 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8950 SDValue Sel;
8951
8952 if (Node->isStrictFPOpcode()) {
8953 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8954 Node->getOperand(0), /*IsSignaling*/ true);
8955 Chain = Sel.getValue(1);
8956 } else {
8957 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8958 }
8959
8960 bool Strict = Node->isStrictFPOpcode() ||
8961 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8962
8963 if (Strict) {
8964 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8965 // signmask then offset (the result of which should be fully representable).
8966 // Sel = Src < 0x8000000000000000
8967 // FltOfs = select Sel, 0, 0x8000000000000000
8968 // IntOfs = select Sel, 0, 0x8000000000000000
8969 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8970
8971 // TODO: Should any fast-math-flags be set for the FSUB?
8972 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8973 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8974 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8975 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8976 DAG.getConstant(0, dl, DstVT),
8977 DAG.getConstant(SignMask, dl, DstVT));
8978 SDValue SInt;
8979 if (Node->isStrictFPOpcode()) {
8980 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8981 { Chain, Src, FltOfs });
8982 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8983 { Val.getValue(1), Val });
8984 Chain = SInt.getValue(1);
8985 } else {
8986 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8987 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8988 }
8989 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8990 } else {
8991 // Expand based on maximum range of FP_TO_SINT:
8992 // True = fp_to_sint(Src)
8993 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8994 // Result = select (Src < 0x8000000000000000), True, False
8995
8996 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8997 // TODO: Should any fast-math-flags be set for the FSUB?
8998 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8999 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
9000 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
9001 DAG.getConstant(SignMask, dl, DstVT));
9002 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
9003 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
9004 }
9005 return true;
9006}
9007
9009 SDValue &Chain, SelectionDAG &DAG) const {
9010 // This transform is not correct for converting 0 when rounding mode is set
9011 // to round toward negative infinity which will produce -0.0. So disable
9012 // under strictfp.
9013 if (Node->isStrictFPOpcode())
9014 return false;
9015
9016 SDValue Src = Node->getOperand(0);
9017 EVT SrcVT = Src.getValueType();
9018 EVT DstVT = Node->getValueType(0);
9019
9020 // If the input is known to be non-negative and SINT_TO_FP is legal then use
9021 // it.
9022 if (Node->getFlags().hasNonNeg() &&
9024 Result =
9025 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
9026 return true;
9027 }
9028
9029 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
9030 return false;
9031
9032 // Only expand vector types if we have the appropriate vector bit
9033 // operations.
9034 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
9039 return false;
9040
9041 SDLoc dl(SDValue(Node, 0));
9042
9043 // Implementation of unsigned i64 to f64 following the algorithm in
9044 // __floatundidf in compiler_rt. This implementation performs rounding
9045 // correctly in all rounding modes with the exception of converting 0
9046 // when rounding toward negative infinity. In that case the fsub will
9047 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
9048 // incorrect.
9049 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
9050 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
9051 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
9052 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
9053 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
9054 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
9055
9056 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
9057 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
9058 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
9059 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
9060 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
9061 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
9062 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
9063 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
9064 return true;
9065}
9066
9067SDValue
9069 SelectionDAG &DAG) const {
9070 unsigned Opcode = Node->getOpcode();
9071 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
9072 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
9073 "Wrong opcode");
9074
9075 if (Node->getFlags().hasNoNaNs()) {
9076 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
9077 EVT VT = Node->getValueType(0);
9078 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
9080 VT.isVector())
9081 return SDValue();
9082 SDValue Op1 = Node->getOperand(0);
9083 SDValue Op2 = Node->getOperand(1);
9084 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
9085 Node->getFlags());
9086 }
9087
9088 return SDValue();
9089}
9090
9092 SelectionDAG &DAG) const {
9093 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
9094 return Expanded;
9095
9096 EVT VT = Node->getValueType(0);
9097 if (VT.isScalableVector())
9099 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
9100
9101 SDLoc dl(Node);
9102 unsigned NewOp =
9104
9105 if (isOperationLegalOrCustom(NewOp, VT)) {
9106 SDValue Quiet0 = Node->getOperand(0);
9107 SDValue Quiet1 = Node->getOperand(1);
9108
9109 if (!Node->getFlags().hasNoNaNs()) {
9110 // Insert canonicalizes if it's possible we need to quiet to get correct
9111 // sNaN behavior.
9112 if (!DAG.isKnownNeverSNaN(Quiet0)) {
9113 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
9114 Node->getFlags());
9115 }
9116 if (!DAG.isKnownNeverSNaN(Quiet1)) {
9117 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
9118 Node->getFlags());
9119 }
9120 }
9121
9122 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
9123 }
9124
9125 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
9126 // instead if there are no NaNs.
9127 if (Node->getFlags().hasNoNaNs() ||
9128 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
9129 DAG.isKnownNeverNaN(Node->getOperand(1)))) {
9130 unsigned IEEE2018Op =
9131 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
9132 if (isOperationLegalOrCustom(IEEE2018Op, VT))
9133 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
9134 Node->getOperand(1), Node->getFlags());
9135 }
9136
9138 return SelCC;
9139
9140 return SDValue();
9141}
9142
9144 SelectionDAG &DAG) const {
9145 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
9146 return Expanded;
9147
9148 SDLoc DL(N);
9149 SDValue LHS = N->getOperand(0);
9150 SDValue RHS = N->getOperand(1);
9151 unsigned Opc = N->getOpcode();
9152 EVT VT = N->getValueType(0);
9153 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9154 bool IsMax = Opc == ISD::FMAXIMUM;
9155 SDNodeFlags Flags = N->getFlags();
9156
9157 // First, implement comparison not propagating NaN. If no native fmin or fmax
9158 // available, use plain select with setcc instead.
9160 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9161 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
9162
9163 // FIXME: We should probably define fminnum/fmaxnum variants with correct
9164 // signed zero behavior.
9165 bool MinMaxMustRespectOrderedZero = false;
9166
9167 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
9168 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
9169 MinMaxMustRespectOrderedZero = true;
9170 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
9171 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
9172 } else {
9174 return DAG.UnrollVectorOp(N);
9175
9176 // NaN (if exists) will be propagated later, so orderness doesn't matter.
9177 SDValue Compare =
9178 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
9179 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
9180 }
9181
9182 // Propagate any NaN of both operands
9183 if (!N->getFlags().hasNoNaNs() &&
9184 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
9185 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
9187 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
9188 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
9189 }
9190
9191 // fminimum/fmaximum requires -0.0 less than +0.0
9192 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
9193 !DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
9194 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
9195 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
9196 SDValue TestZero =
9197 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
9198 SDValue LCmp = DAG.getSelect(
9199 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
9200 MinMax, Flags);
9201 SDValue RCmp = DAG.getSelect(
9202 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
9203 LCmp, Flags);
9204 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
9205 }
9206
9207 return MinMax;
9208}
9209
9211 SelectionDAG &DAG) const {
9212 SDLoc DL(Node);
9213 SDValue LHS = Node->getOperand(0);
9214 SDValue RHS = Node->getOperand(1);
9215 unsigned Opc = Node->getOpcode();
9216 EVT VT = Node->getValueType(0);
9217 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9218 bool IsMax = Opc == ISD::FMAXIMUMNUM;
9219 SDNodeFlags Flags = Node->getFlags();
9220
9221 unsigned NewOp =
9223
9224 if (isOperationLegalOrCustom(NewOp, VT)) {
9225 if (!Flags.hasNoNaNs()) {
9226 // Insert canonicalizes if it's possible we need to quiet to get correct
9227 // sNaN behavior.
9228 if (!DAG.isKnownNeverSNaN(LHS)) {
9229 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
9230 }
9231 if (!DAG.isKnownNeverSNaN(RHS)) {
9232 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
9233 }
9234 }
9235
9236 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
9237 }
9238
9239 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
9240 // same behaviors for all of other cases: +0.0 vs -0.0 included.
9241 if (Flags.hasNoNaNs() ||
9242 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
9243 unsigned IEEE2019Op =
9245 if (isOperationLegalOrCustom(IEEE2019Op, VT))
9246 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
9247 }
9248
9249 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
9250 // either one for +0.0 vs -0.0.
9251 if ((Flags.hasNoNaNs() ||
9252 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
9253 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
9254 DAG.isKnownNeverZeroFloat(RHS))) {
9255 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
9256 if (isOperationLegalOrCustom(IEEE2008Op, VT))
9257 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
9258 }
9259
9260 if (VT.isVector() &&
9263 return DAG.UnrollVectorOp(Node);
9264
9265 // If only one operand is NaN, override it with another operand.
9266 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
9267 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
9268 }
9269 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
9270 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
9271 }
9272
9273 // Always prefer RHS if equal.
9274 SDValue MinMax =
9275 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
9276
9277 // TODO: We need quiet sNaN if strictfp.
9278
9279 // Fixup signed zero behavior.
9280 if (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
9281 DAG.isKnownNeverZeroFloat(RHS)) {
9282 return MinMax;
9283 }
9284 SDValue TestZero =
9285 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
9286 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
9287 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
9288 EVT IntVT = VT.changeTypeToInteger();
9289 EVT FloatVT = VT.changeElementType(*DAG.getContext(), MVT::f32);
9290 SDValue LHSTrunc = LHS;
9292 LHSTrunc = DAG.getNode(ISD::FP_ROUND, DL, FloatVT, LHS,
9293 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
9294 }
9295 // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
9296 // we preferred RHS when generate MinMax, if the operands are equal.
9297 SDValue RetZero = DAG.getSelect(
9298 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHSTrunc, TestZero), LHS,
9299 MinMax, Flags);
9300 return DAG.getSelect(DL, VT, IsZero, RetZero, MinMax, Flags);
9301}
9302
9303/// Returns a true value if if this FPClassTest can be performed with an ordered
9304/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
9305/// std::nullopt if it cannot be performed as a compare with 0.
9306static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
9307 const fltSemantics &Semantics,
9308 const MachineFunction &MF) {
9309 FPClassTest OrderedMask = Test & ~fcNan;
9310 FPClassTest NanTest = Test & fcNan;
9311 bool IsOrdered = NanTest == fcNone;
9312 bool IsUnordered = NanTest == fcNan;
9313
9314 // Skip cases that are testing for only a qnan or snan.
9315 if (!IsOrdered && !IsUnordered)
9316 return std::nullopt;
9317
9318 if (OrderedMask == fcZero &&
9319 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
9320 return IsOrdered;
9321 if (OrderedMask == (fcZero | fcSubnormal) &&
9322 MF.getDenormalMode(Semantics).inputsAreZero())
9323 return IsOrdered;
9324 return std::nullopt;
9325}
9326
9328 const FPClassTest OrigTestMask,
9329 SDNodeFlags Flags, const SDLoc &DL,
9330 SelectionDAG &DAG) const {
9331 EVT OperandVT = Op.getValueType();
9332 assert(OperandVT.isFloatingPoint());
9333 FPClassTest Test = OrigTestMask;
9334
9335 // Degenerated cases.
9336 if (Test == fcNone)
9337 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
9338 if (Test == fcAllFlags)
9339 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
9340
9341 // PPC double double is a pair of doubles, of which the higher part determines
9342 // the value class.
9343 if (OperandVT == MVT::ppcf128) {
9344 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
9345 DAG.getConstant(1, DL, MVT::i32));
9346 OperandVT = MVT::f64;
9347 }
9348
9349 // Floating-point type properties.
9350 EVT ScalarFloatVT = OperandVT.getScalarType();
9351 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
9352 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
9353 bool IsF80 = (ScalarFloatVT == MVT::f80);
9354
9355 // Some checks can be implemented using float comparisons, if floating point
9356 // exceptions are ignored.
9357 if (Flags.hasNoFPExcept() &&
9359 FPClassTest FPTestMask = Test;
9360 bool IsInvertedFP = false;
9361
9362 if (FPClassTest InvertedFPCheck =
9363 invertFPClassTestIfSimpler(FPTestMask, true)) {
9364 FPTestMask = InvertedFPCheck;
9365 IsInvertedFP = true;
9366 }
9367
9368 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
9369 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
9370
9371 // See if we can fold an | fcNan into an unordered compare.
9372 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
9373
9374 // Can't fold the ordered check if we're only testing for snan or qnan
9375 // individually.
9376 if ((FPTestMask & fcNan) != fcNan)
9377 OrderedFPTestMask = FPTestMask;
9378
9379 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
9380
9381 if (std::optional<bool> IsCmp0 =
9382 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
9383 IsCmp0 && (isCondCodeLegalOrCustom(
9384 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
9385 OperandVT.getScalarType().getSimpleVT()))) {
9386
9387 // If denormals could be implicitly treated as 0, this is not equivalent
9388 // to a compare with 0 since it will also be true for denormals.
9389 return DAG.getSetCC(DL, ResultVT, Op,
9390 DAG.getConstantFP(0.0, DL, OperandVT),
9391 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
9392 }
9393
9394 if (FPTestMask == fcNan &&
9396 OperandVT.getScalarType().getSimpleVT()))
9397 return DAG.getSetCC(DL, ResultVT, Op, Op,
9398 IsInvertedFP ? ISD::SETO : ISD::SETUO);
9399
9400 bool IsOrderedInf = FPTestMask == fcInf;
9401 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
9402 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
9403 : UnorderedCmpOpcode,
9404 OperandVT.getScalarType().getSimpleVT()) &&
9407 (OperandVT.isVector() &&
9409 // isinf(x) --> fabs(x) == inf
9410 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9411 SDValue Inf =
9412 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9413 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
9414 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
9415 }
9416
9417 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
9418 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
9419 : UnorderedCmpOpcode,
9420 OperandVT.getSimpleVT())) {
9421 // isposinf(x) --> x == inf
9422 // isneginf(x) --> x == -inf
9423 // isposinf(x) || nan --> x u== inf
9424 // isneginf(x) || nan --> x u== -inf
9425
9426 SDValue Inf = DAG.getConstantFP(
9427 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
9428 OperandVT);
9429 return DAG.getSetCC(DL, ResultVT, Op, Inf,
9430 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9431 }
9432
9433 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9434 // TODO: Could handle ordered case, but it produces worse code for
9435 // x86. Maybe handle ordered if fabs is free?
9436
9437 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9438 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9439
9440 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
9441 OperandVT.getScalarType().getSimpleVT())) {
9442 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9443
9444 // TODO: Maybe only makes sense if fabs is free. Integer test of
9445 // exponent bits seems better for x86.
9446 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9447 SDValue SmallestNormal = DAG.getConstantFP(
9448 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9449 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
9450 IsOrdered ? OrderedOp : UnorderedOp);
9451 }
9452 }
9453
9454 if (FPTestMask == fcNormal) {
9455 // TODO: Handle unordered
9456 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9457 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9458
9459 if (isCondCodeLegalOrCustom(IsFiniteOp,
9460 OperandVT.getScalarType().getSimpleVT()) &&
9461 isCondCodeLegalOrCustom(IsNormalOp,
9462 OperandVT.getScalarType().getSimpleVT()) &&
9463 isFAbsFree(OperandVT)) {
9464 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9465 SDValue Inf =
9466 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9467 SDValue SmallestNormal = DAG.getConstantFP(
9468 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9469
9470 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9471 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
9472 SDValue IsNormal =
9473 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
9474 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9475 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
9476 }
9477 }
9478 }
9479
9480 // Some checks may be represented as inversion of simpler check, for example
9481 // "inf|normal|subnormal|zero" => !"nan".
9482 bool IsInverted = false;
9483
9484 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
9485 Test = InvertedCheck;
9486 IsInverted = true;
9487 }
9488
9489 // In the general case use integer operations.
9490 unsigned BitSize = OperandVT.getScalarSizeInBits();
9491 EVT IntVT = OperandVT.changeElementType(
9492 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), BitSize));
9493 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
9494
9495 // Various masks.
9496 APInt SignBit = APInt::getSignMask(BitSize);
9497 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9498 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9499 const unsigned ExplicitIntBitInF80 = 63;
9500 APInt ExpMask = Inf;
9501 if (IsF80)
9502 ExpMask.clearBit(ExplicitIntBitInF80);
9503 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9504 APInt QNaNBitMask =
9505 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9506 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
9507
9508 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
9509 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
9510 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
9511 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
9512 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
9513 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
9514
9515 SDValue Res;
9516 const auto appendResult = [&](SDValue PartialRes) {
9517 if (PartialRes) {
9518 if (Res)
9519 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
9520 else
9521 Res = PartialRes;
9522 }
9523 };
9524
9525 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9526 const auto getIntBitIsSet = [&]() -> SDValue {
9527 if (!IntBitIsSetV) {
9528 APInt IntBitMask(BitSize, 0);
9529 IntBitMask.setBit(ExplicitIntBitInF80);
9530 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
9531 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
9532 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
9533 }
9534 return IntBitIsSetV;
9535 };
9536
9537 // Split the value into sign bit and absolute value.
9538 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
9539 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
9540 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
9541
9542 // Tests that involve more than one class should be processed first.
9543 SDValue PartialRes;
9544
9545 if (IsF80)
9546 ; // Detect finite numbers of f80 by checking individual classes because
9547 // they have different settings of the explicit integer bit.
9548 else if ((Test & fcFinite) == fcFinite) {
9549 // finite(V) ==> abs(V) < exp_mask
9550 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9551 Test &= ~fcFinite;
9552 } else if ((Test & fcFinite) == fcPosFinite) {
9553 // finite(V) && V > 0 ==> V < exp_mask
9554 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
9555 Test &= ~fcPosFinite;
9556 } else if ((Test & fcFinite) == fcNegFinite) {
9557 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9558 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9559 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9560 Test &= ~fcNegFinite;
9561 }
9562 appendResult(PartialRes);
9563
9564 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9565 // fcZero | fcSubnormal => test all exponent bits are 0
9566 // TODO: Handle sign bit specific cases
9567 if (PartialCheck == (fcZero | fcSubnormal)) {
9568 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
9569 SDValue ExpIsZero =
9570 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9571 appendResult(ExpIsZero);
9572 Test &= ~PartialCheck & fcAllFlags;
9573 }
9574 }
9575
9576 // Check for individual classes.
9577
9578 if (unsigned PartialCheck = Test & fcZero) {
9579 if (PartialCheck == fcPosZero)
9580 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
9581 else if (PartialCheck == fcZero)
9582 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
9583 else // ISD::fcNegZero
9584 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
9585 appendResult(PartialRes);
9586 }
9587
9588 if (unsigned PartialCheck = Test & fcSubnormal) {
9589 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
9590 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
9591 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
9592 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
9593 SDValue VMinusOneV =
9594 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
9595 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
9596 if (PartialCheck == fcNegSubnormal)
9597 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9598 appendResult(PartialRes);
9599 }
9600
9601 if (unsigned PartialCheck = Test & fcInf) {
9602 if (PartialCheck == fcPosInf)
9603 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
9604 else if (PartialCheck == fcInf)
9605 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
9606 else { // ISD::fcNegInf
9607 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9608 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
9609 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
9610 }
9611 appendResult(PartialRes);
9612 }
9613
9614 if (unsigned PartialCheck = Test & fcNan) {
9615 APInt InfWithQnanBit = Inf | QNaNBitMask;
9616 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
9617 if (PartialCheck == fcNan) {
9618 // isnan(V) ==> abs(V) > int(inf)
9619 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9620 if (IsF80) {
9621 // Recognize unsupported values as NaNs for compatibility with glibc.
9622 // In them (exp(V)==0) == int_bit.
9623 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
9624 SDValue ExpIsZero =
9625 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9626 SDValue IsPseudo =
9627 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
9628 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
9629 }
9630 } else if (PartialCheck == fcQNan) {
9631 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
9632 PartialRes =
9633 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
9634 } else { // ISD::fcSNan
9635 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
9636 // abs(V) < (unsigned(Inf) | quiet_bit)
9637 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
9638 SDValue IsNotQnan =
9639 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
9640 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
9641 }
9642 appendResult(PartialRes);
9643 }
9644
9645 if (unsigned PartialCheck = Test & fcNormal) {
9646 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
9647 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
9648 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
9649 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
9650 APInt ExpLimit = ExpMask - ExpLSB;
9651 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
9652 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
9653 if (PartialCheck == fcNegNormal)
9654 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9655 else if (PartialCheck == fcPosNormal) {
9656 SDValue PosSignV =
9657 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
9658 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
9659 }
9660 if (IsF80)
9661 PartialRes =
9662 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
9663 appendResult(PartialRes);
9664 }
9665
9666 if (!Res)
9667 return DAG.getConstant(IsInverted, DL, ResultVT);
9668 if (IsInverted)
9669 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
9670 return Res;
9671}
9672
9673// Only expand vector types if we have the appropriate vector bit operations.
9674static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
9675 assert(VT.isVector() && "Expected vector type");
9676 unsigned Len = VT.getScalarSizeInBits();
9677 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
9680 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
9682}
9683
9685 SDLoc dl(Node);
9686 EVT VT = Node->getValueType(0);
9687 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9688 SDValue Op = Node->getOperand(0);
9689 unsigned Len = VT.getScalarSizeInBits();
9690 assert(VT.isInteger() && "CTPOP not implemented for this type.");
9691
9692 // TODO: Add support for irregular type lengths.
9693 if (!(Len <= 128 && Len % 8 == 0))
9694 return SDValue();
9695
9696 // Only expand vector types if we have the appropriate vector bit operations.
9697 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
9698 return SDValue();
9699
9700 // This is the "best" algorithm from
9701 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9702 SDValue Mask55 =
9703 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9704 SDValue Mask33 =
9705 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9706 SDValue Mask0F =
9707 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9708
9709 // v = v - ((v >> 1) & 0x55555555...)
9710 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
9711 DAG.getNode(ISD::AND, dl, VT,
9712 DAG.getNode(ISD::SRL, dl, VT, Op,
9713 DAG.getConstant(1, dl, ShVT)),
9714 Mask55));
9715 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9716 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9717 DAG.getNode(ISD::AND, dl, VT,
9718 DAG.getNode(ISD::SRL, dl, VT, Op,
9719 DAG.getConstant(2, dl, ShVT)),
9720 Mask33));
9721 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9722 Op = DAG.getNode(ISD::AND, dl, VT,
9723 DAG.getNode(ISD::ADD, dl, VT, Op,
9724 DAG.getNode(ISD::SRL, dl, VT, Op,
9725 DAG.getConstant(4, dl, ShVT))),
9726 Mask0F);
9727
9728 if (Len <= 8)
9729 return Op;
9730
9731 // Avoid the multiply if we only have 2 bytes to add.
9732 // TODO: Only doing this for scalars because vectors weren't as obviously
9733 // improved.
9734 if (Len == 16 && !VT.isVector()) {
9735 // v = (v + (v >> 8)) & 0x00FF;
9736 return DAG.getNode(ISD::AND, dl, VT,
9737 DAG.getNode(ISD::ADD, dl, VT, Op,
9738 DAG.getNode(ISD::SRL, dl, VT, Op,
9739 DAG.getConstant(8, dl, ShVT))),
9740 DAG.getConstant(0xFF, dl, VT));
9741 }
9742
9743 // v = (v * 0x01010101...) >> (Len - 8)
9744 SDValue V;
9747 SDValue Mask01 =
9748 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9749 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9750 } else {
9751 V = Op;
9752 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9753 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9754 V = DAG.getNode(ISD::ADD, dl, VT, V,
9755 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9756 }
9757 }
9758 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9759}
9760
9762 SDLoc dl(Node);
9763 EVT VT = Node->getValueType(0);
9764 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9765 SDValue Op = Node->getOperand(0);
9766 SDValue Mask = Node->getOperand(1);
9767 SDValue VL = Node->getOperand(2);
9768 unsigned Len = VT.getScalarSizeInBits();
9769 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9770
9771 // TODO: Add support for irregular type lengths.
9772 if (!(Len <= 128 && Len % 8 == 0))
9773 return SDValue();
9774
9775 // This is same algorithm of expandCTPOP from
9776 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9777 SDValue Mask55 =
9778 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9779 SDValue Mask33 =
9780 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9781 SDValue Mask0F =
9782 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9783
9784 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9785
9786 // v = v - ((v >> 1) & 0x55555555...)
9787 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9788 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9789 DAG.getConstant(1, dl, ShVT), Mask, VL),
9790 Mask55, Mask, VL);
9791 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9792
9793 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9794 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9795 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9796 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9797 DAG.getConstant(2, dl, ShVT), Mask, VL),
9798 Mask33, Mask, VL);
9799 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9800
9801 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9802 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9803 Mask, VL),
9804 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9805 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9806
9807 if (Len <= 8)
9808 return Op;
9809
9810 // v = (v * 0x01010101...) >> (Len - 8)
9811 SDValue V;
9813 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9814 SDValue Mask01 =
9815 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9816 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9817 } else {
9818 V = Op;
9819 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9820 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9821 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9822 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9823 Mask, VL);
9824 }
9825 }
9826 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9827 Mask, VL);
9828}
9829
9831 SDLoc dl(Node);
9832 EVT VT = Node->getValueType(0);
9833 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9834 SDValue Op = Node->getOperand(0);
9835 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9836
9837 // If the non-ZERO_UNDEF version is supported we can use that instead.
9838 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9840 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9841
9842 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9844 EVT SetCCVT =
9845 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9846 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9847 SDValue Zero = DAG.getConstant(0, dl, VT);
9848 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9849 return DAG.getSelect(dl, VT, SrcIsZero,
9850 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9851 }
9852
9853 // Only expand vector types if we have the appropriate vector bit operations.
9854 // This includes the operations needed to expand CTPOP if it isn't supported.
9855 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9857 !canExpandVectorCTPOP(*this, VT)) ||
9860 return SDValue();
9861
9862 // for now, we do this:
9863 // x = x | (x >> 1);
9864 // x = x | (x >> 2);
9865 // ...
9866 // x = x | (x >>16);
9867 // x = x | (x >>32); // for 64-bit input
9868 // return popcount(~x);
9869 //
9870 // Ref: "Hacker's Delight" by Henry Warren
9871 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9872 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9873 Op = DAG.getNode(ISD::OR, dl, VT, Op,
9874 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9875 }
9876 Op = DAG.getNOT(dl, Op, VT);
9877 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9878}
9879
9881 SDLoc dl(Node);
9882 EVT VT = Node->getValueType(0);
9883 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9884 SDValue Op = Node->getOperand(0);
9885 SDValue Mask = Node->getOperand(1);
9886 SDValue VL = Node->getOperand(2);
9887 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9888
9889 // do this:
9890 // x = x | (x >> 1);
9891 // x = x | (x >> 2);
9892 // ...
9893 // x = x | (x >>16);
9894 // x = x | (x >>32); // for 64-bit input
9895 // return popcount(~x);
9896 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9897 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9898 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9899 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9900 VL);
9901 }
9902 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9903 Mask, VL);
9904 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9905}
9906
9908 SDLoc dl(Node);
9909 EVT VT = Node->getValueType(0);
9910 SDValue Op = DAG.getFreeze(Node->getOperand(0));
9911 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9912
9913 // CTLS(x) = CTLZ(OR(SHL(XOR(x, SRA(x, BW-1)), 1), 1))
9914 // This transforms the sign bits into leading zeros that can be counted.
9915 SDValue ShiftAmt = DAG.getShiftAmountConstant(NumBitsPerElt - 1, VT, dl);
9916 SDValue SignBit = DAG.getNode(ISD::SRA, dl, VT, Op, ShiftAmt);
9917 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, SignBit);
9918 SDValue Shl =
9919 DAG.getNode(ISD::SHL, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9920 SDValue Or = DAG.getNode(ISD::OR, dl, VT, Shl, DAG.getConstant(1, dl, VT));
9921 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Or);
9922}
9923
9925 const SDLoc &DL, EVT VT, SDValue Op,
9926 unsigned BitWidth) const {
9927 if (BitWidth != 32 && BitWidth != 64)
9928 return SDValue();
9929
9930 const DataLayout &TD = DAG.getDataLayout();
9932 return SDValue();
9933
9934 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9935 : APInt(64, 0x0218A392CD3D5DBFULL);
9936 MachinePointerInfo PtrInfo =
9938 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9939 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9940 SDValue Lookup = DAG.getNode(
9941 ISD::SRL, DL, VT,
9942 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9943 DAG.getConstant(DeBruijn, DL, VT)),
9944 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
9946
9948 for (unsigned i = 0; i < BitWidth; i++) {
9949 APInt Shl = DeBruijn.shl(i);
9950 APInt Lshr = Shl.lshr(ShiftAmt);
9951 Table[Lshr.getZExtValue()] = i;
9952 }
9953
9954 // Create a ConstantArray in Constant Pool
9955 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9956 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9957 TD.getPrefTypeAlign(CA->getType()));
9958 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9959 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9960 PtrInfo, MVT::i8);
9961 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9962 return ExtLoad;
9963
9964 EVT SetCCVT =
9965 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9966 SDValue Zero = DAG.getConstant(0, DL, VT);
9967 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9968 return DAG.getSelect(DL, VT, SrcIsZero,
9969 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9970}
9971
9973 SDLoc dl(Node);
9974 EVT VT = Node->getValueType(0);
9975 SDValue Op = Node->getOperand(0);
9976 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9977
9978 // If the non-ZERO_UNDEF version is supported we can use that instead.
9979 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9981 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9982
9983 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9985 EVT SetCCVT =
9986 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9987 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9988 SDValue Zero = DAG.getConstant(0, dl, VT);
9989 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9990 return DAG.getSelect(dl, VT, SrcIsZero,
9991 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9992 }
9993
9994 // Only expand vector types if we have the appropriate vector bit operations.
9995 // This includes the operations needed to expand CTPOP if it isn't supported.
9996 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9999 !canExpandVectorCTPOP(*this, VT)) ||
10003 return SDValue();
10004
10005 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
10006 // to be expanded or converted to a libcall.
10009 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
10010 return V;
10011
10012 // for now, we use: { return popcount(~x & (x - 1)); }
10013 // unless the target has ctlz but not ctpop, in which case we use:
10014 // { return 32 - nlz(~x & (x-1)); }
10015 // Ref: "Hacker's Delight" by Henry Warren
10016 SDValue Tmp = DAG.getNode(
10017 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
10018 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
10019
10020 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
10022 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
10023 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
10024 }
10025
10026 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
10027}
10028
10030 SDValue Op = Node->getOperand(0);
10031 SDValue Mask = Node->getOperand(1);
10032 SDValue VL = Node->getOperand(2);
10033 SDLoc dl(Node);
10034 EVT VT = Node->getValueType(0);
10035
10036 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
10037 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
10038 DAG.getAllOnesConstant(dl, VT), Mask, VL);
10039 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
10040 DAG.getConstant(1, dl, VT), Mask, VL);
10041 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
10042 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
10043}
10044
10046 SelectionDAG &DAG) const {
10047 // %cond = to_bool_vec %source
10048 // %splat = splat /*val=*/VL
10049 // %tz = step_vector
10050 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
10051 // %r = vp.reduce.umin %v
10052 SDLoc DL(N);
10053 SDValue Source = N->getOperand(0);
10054 SDValue Mask = N->getOperand(1);
10055 SDValue EVL = N->getOperand(2);
10056 EVT SrcVT = Source.getValueType();
10057 EVT ResVT = N->getValueType(0);
10058 EVT ResVecVT =
10059 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
10060
10061 // Convert to boolean vector.
10062 if (SrcVT.getScalarType() != MVT::i1) {
10063 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
10064 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
10065 SrcVT.getVectorElementCount());
10066 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
10067 DAG.getCondCode(ISD::SETNE), Mask, EVL);
10068 }
10069
10070 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
10071 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
10072 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
10073 SDValue Select =
10074 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
10075 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
10076}
10077
10079 SelectionDAG &DAG) const {
10080 SDLoc DL(N);
10081 SDValue Mask = N->getOperand(0);
10082 EVT MaskVT = Mask.getValueType();
10083 EVT BoolVT = MaskVT.getScalarType();
10084
10085 // Find a suitable type for a stepvector.
10086 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
10087 if (MaskVT.isScalableVector())
10088 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
10089 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10090 uint64_t EltWidth = TLI.getBitWidthForCttzElements(
10091 BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(),
10092 /*ZeroIsPoison=*/true, &VScaleRange);
10093 // If the step vector element type is smaller than the mask element type,
10094 // use the mask type directly to avoid widening issues.
10095 EltWidth = std::max(EltWidth, BoolVT.getFixedSizeInBits());
10096 EVT StepVT = MVT::getIntegerVT(EltWidth);
10097 EVT StepVecVT = MaskVT.changeVectorElementType(*DAG.getContext(), StepVT);
10098
10099 // If promotion or widening is required to make the type legal, do it here.
10100 // Promotion of integers within LegalizeVectorOps is looking for types of
10101 // the same size but with a smaller number of larger elements, not the usual
10102 // larger size with the same number of larger elements.
10104 TLI.getTypeAction(StepVecVT.getSimpleVT());
10105 SDValue StepVec;
10106 if (TypeAction == TargetLowering::TypePromoteInteger) {
10107 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
10108 StepVT = StepVecVT.getVectorElementType();
10109 StepVec = DAG.getStepVector(DL, StepVecVT);
10110 } else if (TypeAction == TargetLowering::TypeWidenVector) {
10111 // For widening, the element count changes. Create a step vector with only
10112 // the original elements valid and zeros for padding. Also widen the mask.
10113 EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
10114 unsigned WideNumElts = WideVecVT.getVectorNumElements();
10115
10116 // Build widened step vector: <0, 1, ..., OrigNumElts-1, poison, poison, ..>
10117 SDValue OrigStepVec = DAG.getStepVector(DL, StepVecVT);
10118 SDValue UndefStep = DAG.getPOISON(WideVecVT);
10119 StepVec = DAG.getInsertSubvector(DL, UndefStep, OrigStepVec, 0);
10120
10121 // Widen mask: pad with zeros.
10122 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), BoolVT, WideNumElts);
10123 SDValue ZeroMask = DAG.getConstant(0, DL, WideMaskVT);
10124 Mask = DAG.getInsertSubvector(DL, ZeroMask, Mask, 0);
10125
10126 StepVecVT = WideVecVT;
10127 StepVT = WideVecVT.getVectorElementType();
10128 } else {
10129 StepVec = DAG.getStepVector(DL, StepVecVT);
10130 }
10131
10132 // Zero out lanes with inactive elements, then find the highest remaining
10133 // value from the stepvector.
10134 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
10135 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
10136 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
10137 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
10138}
10139
10141 bool IsNegative) const {
10142 SDLoc dl(N);
10143 EVT VT = N->getValueType(0);
10144 SDValue Op = N->getOperand(0);
10145
10146 // abs(x) -> smax(x,sub(0,x))
10147 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
10149 SDValue Zero = DAG.getConstant(0, dl, VT);
10150 Op = DAG.getFreeze(Op);
10151 return DAG.getNode(ISD::SMAX, dl, VT, Op,
10152 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10153 }
10154
10155 // abs(x) -> umin(x,sub(0,x))
10156 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
10158 SDValue Zero = DAG.getConstant(0, dl, VT);
10159 Op = DAG.getFreeze(Op);
10160 return DAG.getNode(ISD::UMIN, dl, VT, Op,
10161 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10162 }
10163
10164 // 0 - abs(x) -> smin(x, sub(0,x))
10165 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
10167 SDValue Zero = DAG.getConstant(0, dl, VT);
10168 Op = DAG.getFreeze(Op);
10169 return DAG.getNode(ISD::SMIN, dl, VT, Op,
10170 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10171 }
10172
10173 // Only expand vector types if we have the appropriate vector operations.
10174 if (VT.isVector() &&
10176 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
10177 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
10179 return SDValue();
10180
10181 Op = DAG.getFreeze(Op);
10182 SDValue Shift = DAG.getNode(
10183 ISD::SRA, dl, VT, Op,
10184 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
10185 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
10186
10187 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
10188 if (!IsNegative)
10189 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
10190
10191 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
10192 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
10193}
10194
10196 SDLoc dl(N);
10197 EVT VT = N->getValueType(0);
10198 SDValue LHS = N->getOperand(0);
10199 SDValue RHS = N->getOperand(1);
10200 bool IsSigned = N->getOpcode() == ISD::ABDS;
10201
10202 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
10203 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
10204 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
10205 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
10206 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
10207 LHS = DAG.getFreeze(LHS);
10208 RHS = DAG.getFreeze(RHS);
10209 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
10210 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
10211 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
10212 }
10213
10214 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
10215 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
10216 LHS = DAG.getFreeze(LHS);
10217 RHS = DAG.getFreeze(RHS);
10218 return DAG.getNode(ISD::OR, dl, VT,
10219 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
10220 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
10221 }
10222
10223 // If the subtract doesn't overflow then just use abs(sub())
10224 bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
10225
10226 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
10227 return DAG.getNode(ISD::ABS, dl, VT,
10228 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
10229
10230 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
10231 return DAG.getNode(ISD::ABS, dl, VT,
10232 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
10233
10234 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10236 LHS = DAG.getFreeze(LHS);
10237 RHS = DAG.getFreeze(RHS);
10238 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
10239
10240 // Branchless expansion iff cmp result is allbits:
10241 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
10242 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
10243 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10244 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
10245 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
10246 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
10247 }
10248
10249 // Similar to the branchless expansion, if we don't prefer selects, use the
10250 // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
10251 // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
10252 // rhs), uof(lhs, rhs)), uof(lhs, rhs))
10253 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
10255 SDValue USubO =
10256 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
10257 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
10258 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
10259 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
10260 }
10261
10262 // FIXME: Should really try to split the vector in case it's legal on a
10263 // subvector.
10265 return DAG.UnrollVectorOp(N);
10266
10267 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10268 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10269 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
10270 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
10271}
10272
10274 SDLoc dl(N);
10275 EVT VT = N->getValueType(0);
10276 SDValue LHS = N->getOperand(0);
10277 SDValue RHS = N->getOperand(1);
10278
10279 unsigned Opc = N->getOpcode();
10280 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
10281 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
10282 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
10283 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
10284 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
10285 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10287 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
10288 "Unknown AVG node");
10289
10290 // If the operands are already extended, we can add+shift.
10291 bool IsExt =
10292 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
10293 DAG.ComputeNumSignBits(RHS) >= 2) ||
10294 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
10295 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
10296 if (IsExt) {
10297 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
10298 if (!IsFloor)
10299 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
10300 return DAG.getNode(ShiftOpc, dl, VT, Sum,
10301 DAG.getShiftAmountConstant(1, VT, dl));
10302 }
10303
10304 // For scalars, see if we can efficiently extend/truncate to use add+shift.
10305 if (VT.isScalarInteger()) {
10306 unsigned BW = VT.getScalarSizeInBits();
10307 EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
10308 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
10309 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
10310 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
10311 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
10312 if (!IsFloor)
10313 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
10314 DAG.getConstant(1, dl, ExtVT));
10315 // Just use SRL as we will be truncating away the extended sign bits.
10316 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
10317 DAG.getShiftAmountConstant(1, ExtVT, dl));
10318 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
10319 }
10320 }
10321
10322 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
10323 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT) &&
10326 SDValue UAddWithOverflow =
10327 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
10328
10329 SDValue Sum = UAddWithOverflow.getValue(0);
10330 SDValue Overflow = UAddWithOverflow.getValue(1);
10331
10332 // Right shift the sum by 1
10333 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
10334 DAG.getShiftAmountConstant(1, VT, dl));
10335
10336 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
10337 SDValue OverflowShl = DAG.getNode(
10338 ISD::SHL, dl, VT, ZeroExtOverflow,
10339 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
10340
10341 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
10342 }
10343
10344 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
10345 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
10346 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
10347 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
10348 LHS = DAG.getFreeze(LHS);
10349 RHS = DAG.getFreeze(RHS);
10350 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
10351 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10352 SDValue Shift =
10353 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
10354 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
10355}
10356
10358 SDLoc dl(N);
10359 EVT VT = N->getValueType(0);
10360 SDValue Op = N->getOperand(0);
10361
10362 if (!VT.isSimple())
10363 return SDValue();
10364
10365 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10366 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10367 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10368 default:
10369 return SDValue();
10370 case MVT::i16:
10371 // Use a rotate by 8. This can be further expanded if necessary.
10372 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10373 case MVT::i32:
10374 // This is meant for ARM speficially, which has ROTR but no ROTL.
10376 SDValue Mask = DAG.getConstant(0x00FF00FF, dl, VT);
10377 // (x & 0x00FF00FF) rotr 8 | (x rotl 8) & 0x00FF00FF
10378 SDValue And = DAG.getNode(ISD::AND, dl, VT, Op, Mask);
10379 SDValue Rotr =
10380 DAG.getNode(ISD::ROTR, dl, VT, And, DAG.getConstant(8, dl, SHVT));
10381 SDValue Rotl =
10382 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10383 SDValue And2 = DAG.getNode(ISD::AND, dl, VT, Rotl, Mask);
10384 return DAG.getNode(ISD::OR, dl, VT, Rotr, And2);
10385 }
10386 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10387 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
10388 DAG.getConstant(0xFF00, dl, VT));
10389 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
10390 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10391 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
10392 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10393 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10394 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10395 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10396 case MVT::i64:
10397 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10398 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
10399 DAG.getConstant(255ULL<<8, dl, VT));
10400 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
10401 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
10402 DAG.getConstant(255ULL<<16, dl, VT));
10403 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
10404 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
10405 DAG.getConstant(255ULL<<24, dl, VT));
10406 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
10407 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10408 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
10409 DAG.getConstant(255ULL<<24, dl, VT));
10410 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10411 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
10412 DAG.getConstant(255ULL<<16, dl, VT));
10413 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
10414 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
10415 DAG.getConstant(255ULL<<8, dl, VT));
10416 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10417 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
10418 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
10419 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10420 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10421 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
10422 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10423 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
10424 }
10425}
10426
10428 SDLoc dl(N);
10429 EVT VT = N->getValueType(0);
10430 SDValue Op = N->getOperand(0);
10431 SDValue Mask = N->getOperand(1);
10432 SDValue EVL = N->getOperand(2);
10433
10434 if (!VT.isSimple())
10435 return SDValue();
10436
10437 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10438 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10439 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10440 default:
10441 return SDValue();
10442 case MVT::i16:
10443 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10444 Mask, EVL);
10445 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10446 Mask, EVL);
10447 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
10448 case MVT::i32:
10449 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10450 Mask, EVL);
10451 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
10452 Mask, EVL);
10453 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
10454 Mask, EVL);
10455 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10456 Mask, EVL);
10457 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10458 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
10459 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10460 Mask, EVL);
10461 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10462 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10463 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10464 case MVT::i64:
10465 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10466 Mask, EVL);
10467 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10468 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10469 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
10470 Mask, EVL);
10471 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10472 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10473 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
10474 Mask, EVL);
10475 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10476 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10477 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
10478 Mask, EVL);
10479 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10480 Mask, EVL);
10481 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
10482 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
10483 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10484 Mask, EVL);
10485 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
10486 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10487 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
10488 Mask, EVL);
10489 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10490 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10491 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10492 Mask, EVL);
10493 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
10494 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
10495 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10496 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10497 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
10498 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10499 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
10500 }
10501}
10502
10504 SDLoc dl(N);
10505 EVT VT = N->getValueType(0);
10506 SDValue Op = N->getOperand(0);
10507 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10508 unsigned Sz = VT.getScalarSizeInBits();
10509
10510 SDValue Tmp, Tmp2, Tmp3;
10511
10512 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10513 // and finally the i1 pairs.
10514 // TODO: We can easily support i4/i2 legal types if any target ever does.
10515 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10516 // Create the masks - repeating the pattern every byte.
10517 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10518 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10519 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10520
10521 // BSWAP if the type is wider than a single byte.
10522 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
10523
10524 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10525 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
10526 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
10527 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
10528 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
10529 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10530
10531 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10532 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
10533 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
10534 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
10535 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
10536 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10537
10538 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10539 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
10540 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
10541 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
10542 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
10543 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
10544 return Tmp;
10545 }
10546
10547 Tmp = DAG.getConstant(0, dl, VT);
10548 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
10549 if (I < J)
10550 Tmp2 =
10551 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
10552 else
10553 Tmp2 =
10554 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
10555
10556 APInt Shift = APInt::getOneBitSet(Sz, J);
10557 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
10558 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
10559 }
10560
10561 return Tmp;
10562}
10563
10565 assert(N->getOpcode() == ISD::VP_BITREVERSE);
10566
10567 SDLoc dl(N);
10568 EVT VT = N->getValueType(0);
10569 SDValue Op = N->getOperand(0);
10570 SDValue Mask = N->getOperand(1);
10571 SDValue EVL = N->getOperand(2);
10572 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10573 unsigned Sz = VT.getScalarSizeInBits();
10574
10575 SDValue Tmp, Tmp2, Tmp3;
10576
10577 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
10578 // and finally the i1 pairs.
10579 // TODO: We can easily support i4/i2 legal types if any target ever does.
10580 if (Sz >= 8 && isPowerOf2_32(Sz)) {
10581 // Create the masks - repeating the pattern every byte.
10582 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
10583 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
10584 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
10585
10586 // BSWAP if the type is wider than a single byte.
10587 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
10588
10589 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
10590 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
10591 Mask, EVL);
10592 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10593 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
10594 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
10595 Mask, EVL);
10596 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
10597 Mask, EVL);
10598 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10599
10600 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
10601 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
10602 Mask, EVL);
10603 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10604 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
10605 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
10606 Mask, EVL);
10607 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
10608 Mask, EVL);
10609 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10610
10611 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
10612 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
10613 Mask, EVL);
10614 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10615 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
10616 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
10617 Mask, EVL);
10618 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
10619 Mask, EVL);
10620 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
10621 return Tmp;
10622 }
10623 return SDValue();
10624}
10625
10626std::pair<SDValue, SDValue>
10628 SelectionDAG &DAG) const {
10629 SDLoc SL(LD);
10630 SDValue Chain = LD->getChain();
10631 SDValue BasePTR = LD->getBasePtr();
10632 EVT SrcVT = LD->getMemoryVT();
10633 EVT DstVT = LD->getValueType(0);
10634 ISD::LoadExtType ExtType = LD->getExtensionType();
10635
10636 if (SrcVT.isScalableVector())
10637 report_fatal_error("Cannot scalarize scalable vector loads");
10638
10639 unsigned NumElem = SrcVT.getVectorNumElements();
10640
10641 EVT SrcEltVT = SrcVT.getScalarType();
10642 EVT DstEltVT = DstVT.getScalarType();
10643
10644 // A vector must always be stored in memory as-is, i.e. without any padding
10645 // between the elements, since various code depend on it, e.g. in the
10646 // handling of a bitcast of a vector type to int, which may be done with a
10647 // vector store followed by an integer load. A vector that does not have
10648 // elements that are byte-sized must therefore be stored as an integer
10649 // built out of the extracted vector elements.
10650 if (!SrcEltVT.isByteSized()) {
10651 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
10652 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
10653
10654 unsigned NumSrcBits = SrcVT.getSizeInBits();
10655 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
10656
10657 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
10658 SDValue SrcEltBitMask = DAG.getConstant(
10659 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
10660
10661 // Load the whole vector and avoid masking off the top bits as it makes
10662 // the codegen worse.
10663 SDValue Load =
10664 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
10665 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
10666 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10667
10669 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10670 unsigned ShiftIntoIdx =
10671 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10672 SDValue ShiftAmount = DAG.getShiftAmountConstant(
10673 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
10674 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
10675 SDValue Elt =
10676 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
10677 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
10678
10679 if (ExtType != ISD::NON_EXTLOAD) {
10680 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
10681 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
10682 }
10683
10684 Vals.push_back(Scalar);
10685 }
10686
10687 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10688 return std::make_pair(Value, Load.getValue(1));
10689 }
10690
10691 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
10692 assert(SrcEltVT.isByteSized());
10693
10695 SmallVector<SDValue, 8> LoadChains;
10696
10697 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10698 SDValue ScalarLoad = DAG.getExtLoad(
10699 ExtType, SL, DstEltVT, Chain, BasePTR,
10700 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
10701 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10702
10703 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
10704
10705 Vals.push_back(ScalarLoad.getValue(0));
10706 LoadChains.push_back(ScalarLoad.getValue(1));
10707 }
10708
10709 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
10710 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
10711
10712 return std::make_pair(Value, NewChain);
10713}
10714
10716 SelectionDAG &DAG) const {
10717 SDLoc SL(ST);
10718
10719 SDValue Chain = ST->getChain();
10720 SDValue BasePtr = ST->getBasePtr();
10721 SDValue Value = ST->getValue();
10722 EVT StVT = ST->getMemoryVT();
10723
10724 if (StVT.isScalableVector())
10725 report_fatal_error("Cannot scalarize scalable vector stores");
10726
10727 // The type of the data we want to save
10728 EVT RegVT = Value.getValueType();
10729 EVT RegSclVT = RegVT.getScalarType();
10730
10731 // The type of data as saved in memory.
10732 EVT MemSclVT = StVT.getScalarType();
10733
10734 unsigned NumElem = StVT.getVectorNumElements();
10735
10736 // A vector must always be stored in memory as-is, i.e. without any padding
10737 // between the elements, since various code depend on it, e.g. in the
10738 // handling of a bitcast of a vector type to int, which may be done with a
10739 // vector store followed by an integer load. A vector that does not have
10740 // elements that are byte-sized must therefore be stored as an integer
10741 // built out of the extracted vector elements.
10742 if (!MemSclVT.isByteSized()) {
10743 unsigned NumBits = StVT.getSizeInBits();
10744 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
10745
10746 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
10747
10748 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10749 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10750 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
10751 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
10752 unsigned ShiftIntoIdx =
10753 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
10754 SDValue ShiftAmount =
10755 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
10756 SDValue ShiftedElt =
10757 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
10758 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
10759 }
10760
10761 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
10762 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10763 ST->getAAInfo());
10764 }
10765
10766 // Store Stride in bytes
10767 unsigned Stride = MemSclVT.getSizeInBits() / 8;
10768 assert(Stride && "Zero stride!");
10769 // Extract each of the elements from the original vector and save them into
10770 // memory individually.
10772 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
10773 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
10774
10775 SDValue Ptr =
10776 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
10777
10778 // This scalar TruncStore may be illegal, but we legalize it later.
10779 SDValue Store = DAG.getTruncStore(
10780 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
10781 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
10782 ST->getAAInfo());
10783
10784 Stores.push_back(Store);
10785 }
10786
10787 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
10788}
10789
10790std::pair<SDValue, SDValue>
10792 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
10793 "unaligned indexed loads not implemented!");
10794 SDValue Chain = LD->getChain();
10795 SDValue Ptr = LD->getBasePtr();
10796 EVT VT = LD->getValueType(0);
10797 EVT LoadedVT = LD->getMemoryVT();
10798 SDLoc dl(LD);
10799 auto &MF = DAG.getMachineFunction();
10800
10801 if (VT.isFloatingPoint() || VT.isVector()) {
10802 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
10803 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
10804 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
10805 LoadedVT.isVector()) {
10806 // Scalarize the load and let the individual components be handled.
10807 return scalarizeVectorLoad(LD, DAG);
10808 }
10809
10810 // Expand to a (misaligned) integer load of the same size,
10811 // then bitconvert to floating point or vector.
10812 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10813 LD->getMemOperand());
10814 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10815 if (LoadedVT != VT)
10816 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10817 ISD::ANY_EXTEND, dl, VT, Result);
10818
10819 return std::make_pair(Result, newLoad.getValue(1));
10820 }
10821
10822 // Copy the value to a (aligned) stack slot using (unaligned) integer
10823 // loads and stores, then do a (aligned) load from the stack slot.
10824 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10825 unsigned LoadedBytes = LoadedVT.getStoreSize();
10826 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10827 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10828
10829 // Make sure the stack slot is also aligned for the register type.
10830 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10831 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10833 SDValue StackPtr = StackBase;
10834 unsigned Offset = 0;
10835
10836 EVT PtrVT = Ptr.getValueType();
10837 EVT StackPtrVT = StackPtr.getValueType();
10838
10839 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10840 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10841
10842 // Do all but one copies using the full register width.
10843 for (unsigned i = 1; i < NumRegs; i++) {
10844 // Load one integer register's worth from the original location.
10845 SDValue Load = DAG.getLoad(
10846 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10847 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
10848 // Follow the load with a store to the stack slot. Remember the store.
10849 Stores.push_back(DAG.getStore(
10850 Load.getValue(1), dl, Load, StackPtr,
10851 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10852 // Increment the pointers.
10853 Offset += RegBytes;
10854
10855 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10856 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10857 }
10858
10859 // The last copy may be partial. Do an extending load.
10860 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10861 8 * (LoadedBytes - Offset));
10862 SDValue Load = DAG.getExtLoad(
10863 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10864 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
10865 LD->getMemOperand()->getFlags(), LD->getAAInfo());
10866 // Follow the load with a store to the stack slot. Remember the store.
10867 // On big-endian machines this requires a truncating store to ensure
10868 // that the bits end up in the right place.
10869 Stores.push_back(DAG.getTruncStore(
10870 Load.getValue(1), dl, Load, StackPtr,
10871 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10872
10873 // The order of the stores doesn't matter - say it with a TokenFactor.
10874 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10875
10876 // Finally, perform the original load only redirected to the stack slot.
10877 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10878 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10879 LoadedVT);
10880
10881 // Callers expect a MERGE_VALUES node.
10882 return std::make_pair(Load, TF);
10883 }
10884
10885 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10886 "Unaligned load of unsupported type.");
10887
10888 // Compute the new VT that is half the size of the old one. This is an
10889 // integer MVT.
10890 unsigned NumBits = LoadedVT.getSizeInBits();
10891 EVT NewLoadedVT;
10892 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10893 NumBits >>= 1;
10894
10895 Align Alignment = LD->getBaseAlign();
10896 unsigned IncrementSize = NumBits / 8;
10897 ISD::LoadExtType HiExtType = LD->getExtensionType();
10898
10899 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10900 if (HiExtType == ISD::NON_EXTLOAD)
10901 HiExtType = ISD::ZEXTLOAD;
10902
10903 // Load the value in two parts
10904 SDValue Lo, Hi;
10905 if (DAG.getDataLayout().isLittleEndian()) {
10906 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10907 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10908 LD->getAAInfo());
10909
10910 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10911 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10912 LD->getPointerInfo().getWithOffset(IncrementSize),
10913 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10914 LD->getAAInfo());
10915 } else {
10916 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10917 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10918 LD->getAAInfo());
10919
10920 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10921 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10922 LD->getPointerInfo().getWithOffset(IncrementSize),
10923 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10924 LD->getAAInfo());
10925 }
10926
10927 // aggregate the two parts
10928 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10929 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10930 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10931
10932 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10933 Hi.getValue(1));
10934
10935 return std::make_pair(Result, TF);
10936}
10937
10939 SelectionDAG &DAG) const {
10940 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10941 "unaligned indexed stores not implemented!");
10942 SDValue Chain = ST->getChain();
10943 SDValue Ptr = ST->getBasePtr();
10944 SDValue Val = ST->getValue();
10945 EVT VT = Val.getValueType();
10946 Align Alignment = ST->getBaseAlign();
10947 auto &MF = DAG.getMachineFunction();
10948 EVT StoreMemVT = ST->getMemoryVT();
10949
10950 SDLoc dl(ST);
10951 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10952 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10953 if (isTypeLegal(intVT)) {
10954 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10955 StoreMemVT.isVector()) {
10956 // Scalarize the store and let the individual components be handled.
10957 SDValue Result = scalarizeVectorStore(ST, DAG);
10958 return Result;
10959 }
10960 // Expand to a bitconvert of the value to the integer type of the
10961 // same size, then a (misaligned) int store.
10962 // FIXME: Does not handle truncating floating point stores!
10963 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10964 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10965 Alignment, ST->getMemOperand()->getFlags());
10966 return Result;
10967 }
10968 // Do a (aligned) store to a stack slot, then copy from the stack slot
10969 // to the final destination using (unaligned) integer loads and stores.
10970 MVT RegVT = getRegisterType(
10971 *DAG.getContext(),
10972 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10973 EVT PtrVT = Ptr.getValueType();
10974 unsigned StoredBytes = StoreMemVT.getStoreSize();
10975 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10976 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10977
10978 // Make sure the stack slot is also aligned for the register type.
10979 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10980 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10981
10982 // Perform the original store, only redirected to the stack slot.
10983 SDValue Store = DAG.getTruncStore(
10984 Chain, dl, Val, StackPtr,
10985 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10986
10987 EVT StackPtrVT = StackPtr.getValueType();
10988
10989 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10990 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10992 unsigned Offset = 0;
10993
10994 // Do all but one copies using the full register width.
10995 for (unsigned i = 1; i < NumRegs; i++) {
10996 // Load one integer register's worth from the stack slot.
10997 SDValue Load = DAG.getLoad(
10998 RegVT, dl, Store, StackPtr,
10999 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
11000 // Store it to the final location. Remember the store.
11001 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
11002 ST->getPointerInfo().getWithOffset(Offset),
11003 ST->getBaseAlign(),
11004 ST->getMemOperand()->getFlags()));
11005 // Increment the pointers.
11006 Offset += RegBytes;
11007 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
11008 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
11009 }
11010
11011 // The last store may be partial. Do a truncating store. On big-endian
11012 // machines this requires an extending load from the stack slot to ensure
11013 // that the bits are in the right place.
11014 EVT LoadMemVT =
11015 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
11016
11017 // Load from the stack slot.
11018 SDValue Load = DAG.getExtLoad(
11019 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
11020 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
11021
11022 Stores.push_back(DAG.getTruncStore(
11023 Load.getValue(1), dl, Load, Ptr,
11024 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
11025 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
11026 // The order of the stores doesn't matter - say it with a TokenFactor.
11027 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
11028 return Result;
11029 }
11030
11031 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
11032 "Unaligned store of unknown type.");
11033 // Get the half-size VT
11034 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
11035 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
11036 unsigned IncrementSize = NumBits / 8;
11037
11038 // Divide the stored value in two parts.
11039 SDValue ShiftAmount =
11040 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
11041 SDValue Lo = Val;
11042 // If Val is a constant, replace the upper bits with 0. The SRL will constant
11043 // fold and not use the upper bits. A smaller constant may be easier to
11044 // materialize.
11045 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
11046 Lo = DAG.getNode(
11047 ISD::AND, dl, VT, Lo,
11048 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
11049 VT));
11050 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
11051
11052 // Store the two parts
11053 SDValue Store1, Store2;
11054 Store1 = DAG.getTruncStore(Chain, dl,
11055 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
11056 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
11057 ST->getMemOperand()->getFlags());
11058
11059 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11060 Store2 = DAG.getTruncStore(
11061 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
11062 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
11063 ST->getMemOperand()->getFlags(), ST->getAAInfo());
11064
11065 SDValue Result =
11066 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
11067 return Result;
11068}
11069
11070SDValue
11072 const SDLoc &DL, EVT DataVT,
11073 SelectionDAG &DAG,
11074 bool IsCompressedMemory) const {
11076 EVT AddrVT = Addr.getValueType();
11077 EVT MaskVT = Mask.getValueType();
11078 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
11079 "Incompatible types of Data and Mask");
11080 if (IsCompressedMemory) {
11081 // Incrementing the pointer according to number of '1's in the mask.
11082 if (DataVT.isScalableVector()) {
11083 EVT MaskExtVT = MaskVT.changeElementType(*DAG.getContext(), MVT::i32);
11084 SDValue MaskExt = DAG.getNode(ISD::ZERO_EXTEND, DL, MaskExtVT, Mask);
11085 Increment = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, MaskExt);
11086 } else {
11087 EVT MaskIntVT =
11088 EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
11089 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
11090 if (MaskIntVT.getSizeInBits() < 32) {
11091 MaskInIntReg =
11092 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
11093 MaskIntVT = MVT::i32;
11094 }
11095 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
11096 }
11097 // Scale is an element size in bytes.
11098 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
11099 AddrVT);
11100 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
11101 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
11102 } else
11103 Increment = DAG.getTypeSize(DL, AddrVT, DataVT.getStoreSize());
11104
11105 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
11106}
11107
11109 EVT VecVT, const SDLoc &dl,
11110 ElementCount SubEC) {
11111 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
11112 "Cannot index a scalable vector within a fixed-width vector");
11113
11114 unsigned NElts = VecVT.getVectorMinNumElements();
11115 unsigned NumSubElts = SubEC.getKnownMinValue();
11116 EVT IdxVT = Idx.getValueType();
11117
11118 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
11119 // If this is a constant index and we know the value plus the number of the
11120 // elements in the subvector minus one is less than the minimum number of
11121 // elements then it's safe to return Idx.
11122 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
11123 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
11124 return Idx;
11125 SDValue VS =
11126 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
11127 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
11128 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
11129 DAG.getConstant(NumSubElts, dl, IdxVT));
11130 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
11131 }
11132 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
11133 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
11134 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
11135 DAG.getConstant(Imm, dl, IdxVT));
11136 }
11137 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
11138 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
11139 DAG.getConstant(MaxIndex, dl, IdxVT));
11140}
11141
11142SDValue
11144 EVT VecVT, SDValue Index,
11145 const SDNodeFlags PtrArithFlags) const {
11147 DAG, VecPtr, VecVT,
11149 Index, PtrArithFlags);
11150}
11151
11152SDValue
11154 EVT VecVT, EVT SubVecVT, SDValue Index,
11155 const SDNodeFlags PtrArithFlags) const {
11156 SDLoc dl(Index);
11157 // Make sure the index type is big enough to compute in.
11158 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
11159
11160 EVT EltVT = VecVT.getVectorElementType();
11161
11162 // Calculate the element offset and add it to the pointer.
11163 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
11164 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
11165 "Converting bits to bytes lost precision");
11166 assert(SubVecVT.getVectorElementType() == EltVT &&
11167 "Sub-vector must be a vector with matching element type");
11168 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
11169 SubVecVT.getVectorElementCount());
11170
11171 EVT IdxVT = Index.getValueType();
11172 if (SubVecVT.isScalableVector())
11173 Index =
11174 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
11175 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
11176
11177 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
11178 DAG.getConstant(EltSize, dl, IdxVT));
11179 return DAG.getMemBasePlusOffset(VecPtr, Index, dl, PtrArithFlags);
11180}
11181
11182//===----------------------------------------------------------------------===//
11183// Implementation of Emulated TLS Model
11184//===----------------------------------------------------------------------===//
11185
11187 SelectionDAG &DAG) const {
11188 // Access to address of TLS varialbe xyz is lowered to a function call:
11189 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
11190 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11191 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
11192 SDLoc dl(GA);
11193
11194 ArgListTy Args;
11195 const GlobalValue *GV =
11197 SmallString<32> NameString("__emutls_v.");
11198 NameString += GV->getName();
11199 StringRef EmuTlsVarName(NameString);
11200 const GlobalVariable *EmuTlsVar =
11201 GV->getParent()->getNamedGlobal(EmuTlsVarName);
11202 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
11203 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
11204
11205 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
11206
11208 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
11209 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
11210 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
11211
11212 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
11213 // At last for X86 targets, maybe good for other targets too?
11215 MFI.setAdjustsStack(true); // Is this only for X86 target?
11216 MFI.setHasCalls(true);
11217
11218 assert((GA->getOffset() == 0) &&
11219 "Emulated TLS must have zero offset in GlobalAddressSDNode");
11220 return CallResult.first;
11221}
11222
11224 SelectionDAG &DAG) const {
11225 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
11226 if (!isCtlzFast())
11227 return SDValue();
11228 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11229 SDLoc dl(Op);
11230 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
11231 EVT VT = Op.getOperand(0).getValueType();
11232 SDValue Zext = Op.getOperand(0);
11233 if (VT.bitsLT(MVT::i32)) {
11234 VT = MVT::i32;
11235 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
11236 }
11237 unsigned Log2b = Log2_32(VT.getSizeInBits());
11238 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
11239 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
11240 DAG.getConstant(Log2b, dl, MVT::i32));
11241 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
11242 }
11243 return SDValue();
11244}
11245
11247 SDValue Op0 = Node->getOperand(0);
11248 SDValue Op1 = Node->getOperand(1);
11249 EVT VT = Op0.getValueType();
11250 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11251 unsigned Opcode = Node->getOpcode();
11252 SDLoc DL(Node);
11253
11254 // If both sign bits are zero, flip UMIN/UMAX <-> SMIN/SMAX if legal.
11255 unsigned AltOpcode = ISD::getOppositeSignednessMinMaxOpcode(Opcode);
11256 if (isOperationLegal(AltOpcode, VT) && DAG.SignBitIsZero(Op0) &&
11257 DAG.SignBitIsZero(Op1))
11258 return DAG.getNode(AltOpcode, DL, VT, Op0, Op1);
11259
11260 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
11261 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
11263 Op0 = DAG.getFreeze(Op0);
11264 SDValue Zero = DAG.getConstant(0, DL, VT);
11265 return DAG.getNode(ISD::SUB, DL, VT, Op0,
11266 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
11267 }
11268
11269 // umin(x,y) -> sub(x,usubsat(x,y))
11270 // TODO: Missing freeze(Op0)?
11271 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
11273 return DAG.getNode(ISD::SUB, DL, VT, Op0,
11274 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
11275 }
11276
11277 // umax(x,y) -> add(x,usubsat(y,x))
11278 // TODO: Missing freeze(Op0)?
11279 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
11281 return DAG.getNode(ISD::ADD, DL, VT, Op0,
11282 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
11283 }
11284
11285 // FIXME: Should really try to split the vector in case it's legal on a
11286 // subvector.
11288 return DAG.UnrollVectorOp(Node);
11289
11290 // Attempt to find an existing SETCC node that we can reuse.
11291 // TODO: Do we need a generic doesSETCCNodeExist?
11292 // TODO: Missing freeze(Op0)/freeze(Op1)?
11293 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
11294 ISD::CondCode PrefCommuteCC,
11295 ISD::CondCode AltCommuteCC) {
11296 SDVTList BoolVTList = DAG.getVTList(BoolVT);
11297 for (ISD::CondCode CC : {PrefCC, AltCC}) {
11298 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
11299 {Op0, Op1, DAG.getCondCode(CC)})) {
11300 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
11301 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
11302 }
11303 }
11304 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
11305 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
11306 {Op0, Op1, DAG.getCondCode(CC)})) {
11307 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
11308 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
11309 }
11310 }
11311 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
11312 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
11313 };
11314
11315 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
11316 // -> Y = (A < B) ? B : A
11317 // -> Y = (A >= B) ? A : B
11318 // -> Y = (A <= B) ? B : A
11319 switch (Opcode) {
11320 case ISD::SMAX:
11321 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
11322 case ISD::SMIN:
11323 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
11324 case ISD::UMAX:
11325 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
11326 case ISD::UMIN:
11327 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
11328 }
11329
11330 llvm_unreachable("How did we get here?");
11331}
11332
11334 unsigned Opcode = Node->getOpcode();
11335 SDValue LHS = Node->getOperand(0);
11336 SDValue RHS = Node->getOperand(1);
11337 EVT VT = LHS.getValueType();
11338 SDLoc dl(Node);
11339
11340 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11341 assert(VT.isInteger() && "Expected operands to be integers");
11342
11343 // usub.sat(a, b) -> umax(a, b) - b
11344 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
11345 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
11346 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
11347 }
11348
11349 // usub.sat(a, 1) -> sub(a, zext(a != 0))
11350 if (Opcode == ISD::USUBSAT && isOneOrOneSplat(RHS)) {
11351 LHS = DAG.getFreeze(LHS);
11352 SDValue Zero = DAG.getConstant(0, dl, VT);
11353 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11354 SDValue IsNonZero = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETNE);
11355 SDValue Subtrahend = DAG.getBoolExtOrTrunc(IsNonZero, dl, VT, BoolVT);
11356 Subtrahend =
11357 DAG.getNode(ISD::AND, dl, VT, Subtrahend, DAG.getConstant(1, dl, VT));
11358 return DAG.getNode(ISD::SUB, dl, VT, LHS, Subtrahend);
11359 }
11360
11361 // uadd.sat(a, b) -> umin(a, ~b) + b
11362 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
11363 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
11364 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
11365 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
11366 }
11367
11368 unsigned OverflowOp;
11369 switch (Opcode) {
11370 case ISD::SADDSAT:
11371 OverflowOp = ISD::SADDO;
11372 break;
11373 case ISD::UADDSAT:
11374 OverflowOp = ISD::UADDO;
11375 break;
11376 case ISD::SSUBSAT:
11377 OverflowOp = ISD::SSUBO;
11378 break;
11379 case ISD::USUBSAT:
11380 OverflowOp = ISD::USUBO;
11381 break;
11382 default:
11383 llvm_unreachable("Expected method to receive signed or unsigned saturation "
11384 "addition or subtraction node.");
11385 }
11386
11387 // FIXME: Should really try to split the vector in case it's legal on a
11388 // subvector.
11390 return DAG.UnrollVectorOp(Node);
11391
11392 unsigned BitWidth = LHS.getScalarValueSizeInBits();
11393 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11394 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11395 SDValue SumDiff = Result.getValue(0);
11396 SDValue Overflow = Result.getValue(1);
11397 SDValue Zero = DAG.getConstant(0, dl, VT);
11398 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
11399
11400 if (Opcode == ISD::UADDSAT) {
11402 // (LHS + RHS) | OverflowMask
11403 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11404 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
11405 }
11406 // Overflow ? 0xffff.... : (LHS + RHS)
11407 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
11408 }
11409
11410 if (Opcode == ISD::USUBSAT) {
11412 // (LHS - RHS) & ~OverflowMask
11413 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11414 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
11415 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
11416 }
11417 // Overflow ? 0 : (LHS - RHS)
11418 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
11419 }
11420
11421 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
11424
11425 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
11426 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
11427
11428 // If either of the operand signs are known, then they are guaranteed to
11429 // only saturate in one direction. If non-negative they will saturate
11430 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
11431 //
11432 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
11433 // sign of 'y' has to be flipped.
11434
11435 bool LHSIsNonNegative = KnownLHS.isNonNegative();
11436 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
11437 : KnownRHS.isNegative();
11438 if (LHSIsNonNegative || RHSIsNonNegative) {
11439 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11440 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
11441 }
11442
11443 bool LHSIsNegative = KnownLHS.isNegative();
11444 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
11445 : KnownRHS.isNonNegative();
11446 if (LHSIsNegative || RHSIsNegative) {
11447 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11448 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
11449 }
11450 }
11451
11452 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
11454 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11455 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
11456 DAG.getConstant(BitWidth - 1, dl, VT));
11457 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
11458 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
11459}
11460
11462 unsigned Opcode = Node->getOpcode();
11463 SDValue LHS = Node->getOperand(0);
11464 SDValue RHS = Node->getOperand(1);
11465 EVT VT = LHS.getValueType();
11466 EVT ResVT = Node->getValueType(0);
11467 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11468 SDLoc dl(Node);
11469
11470 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
11471 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
11472 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
11473 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
11474
11475 // We can't perform arithmetic on i1 values. Extending them would
11476 // probably result in worse codegen, so let's just use two selects instead.
11477 // Some targets are also just better off using selects rather than subtraction
11478 // because one of the conditions can be merged with one of the selects.
11479 // And finally, if we don't know the contents of high bits of a boolean value
11480 // we can't perform any arithmetic either.
11482 BoolVT.getScalarSizeInBits() == 1 ||
11484 SDValue SelectZeroOrOne =
11485 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
11486 DAG.getConstant(0, dl, ResVT));
11487 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
11488 SelectZeroOrOne);
11489 }
11490
11492 std::swap(IsGT, IsLT);
11493 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
11494 ResVT);
11495}
11496
11498 unsigned Opcode = Node->getOpcode();
11499 bool IsSigned = Opcode == ISD::SSHLSAT;
11500 SDValue LHS = Node->getOperand(0);
11501 SDValue RHS = Node->getOperand(1);
11502 EVT VT = LHS.getValueType();
11503 SDLoc dl(Node);
11504
11505 assert((Node->getOpcode() == ISD::SSHLSAT ||
11506 Node->getOpcode() == ISD::USHLSAT) &&
11507 "Expected a SHLSAT opcode");
11508 assert(VT.isInteger() && "Expected operands to be integers");
11509
11511 return DAG.UnrollVectorOp(Node);
11512
11513 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
11514
11515 unsigned BW = VT.getScalarSizeInBits();
11516 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11517 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
11518 SDValue Orig =
11519 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
11520
11521 SDValue SatVal;
11522 if (IsSigned) {
11523 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
11524 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
11525 SDValue Cond =
11526 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
11527 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
11528 } else {
11529 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
11530 }
11531 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
11532 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
11533}
11534
11536 bool Signed, SDValue &Lo, SDValue &Hi,
11537 SDValue LHS, SDValue RHS,
11538 SDValue HiLHS, SDValue HiRHS) const {
11539 EVT VT = LHS.getValueType();
11540 assert(RHS.getValueType() == VT && "Mismatching operand types");
11541
11542 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
11543 assert((!Signed || !HiLHS) &&
11544 "Signed flag should only be set when HiLHS and RiRHS are null");
11545
11546 // We'll expand the multiplication by brute force because we have no other
11547 // options. This is a trivially-generalized version of the code from
11548 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
11549 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
11550 // sign bits while calculating the Hi half.
11551 unsigned Bits = VT.getSizeInBits();
11552 unsigned HalfBits = Bits / 2;
11553 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
11554 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
11555 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
11556
11557 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
11558 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
11559
11560 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
11561 // This is always an unsigned shift.
11562 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
11563
11564 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
11565 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
11566 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
11567
11568 SDValue U =
11569 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
11570 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
11571 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
11572
11573 SDValue V =
11574 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
11575 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
11576
11577 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
11578 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
11579
11580 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
11581 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
11582
11583 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
11584 // the products to Hi.
11585 if (HiLHS) {
11586 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
11587 DAG.getNode(ISD::ADD, dl, VT,
11588 DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS),
11589 DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS)));
11590 }
11591}
11592
11594 bool Signed, const SDValue LHS,
11595 const SDValue RHS, SDValue &Lo,
11596 SDValue &Hi) const {
11597 EVT VT = LHS.getValueType();
11598 assert(RHS.getValueType() == VT && "Mismatching operand types");
11599 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
11600 // We can fall back to a libcall with an illegal type for the MUL if we
11601 // have a libcall big enough.
11602 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
11603 if (WideVT == MVT::i16)
11604 LC = RTLIB::MUL_I16;
11605 else if (WideVT == MVT::i32)
11606 LC = RTLIB::MUL_I32;
11607 else if (WideVT == MVT::i64)
11608 LC = RTLIB::MUL_I64;
11609 else if (WideVT == MVT::i128)
11610 LC = RTLIB::MUL_I128;
11611
11612 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
11613 if (LibcallImpl == RTLIB::Unsupported) {
11614 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
11615 return;
11616 }
11617
11618 SDValue HiLHS, HiRHS;
11619 if (Signed) {
11620 // The high part is obtained by SRA'ing all but one of the bits of low
11621 // part.
11622 unsigned LoSize = VT.getFixedSizeInBits();
11623 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
11624 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
11625 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
11626 } else {
11627 HiLHS = DAG.getConstant(0, dl, VT);
11628 HiRHS = DAG.getConstant(0, dl, VT);
11629 }
11630
11631 // Attempt a libcall.
11632 SDValue Ret;
11634 CallOptions.setIsSigned(Signed);
11635 CallOptions.setIsPostTypeLegalization(true);
11637 // Halves of WideVT are packed into registers in different order
11638 // depending on platform endianness. This is usually handled by
11639 // the C calling convention, but we can't defer to it in
11640 // the legalizer.
11641 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
11642 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11643 } else {
11644 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
11645 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
11646 }
11648 "Ret value is a collection of constituent nodes holding result.");
11649 if (DAG.getDataLayout().isLittleEndian()) {
11650 // Same as above.
11651 Lo = Ret.getOperand(0);
11652 Hi = Ret.getOperand(1);
11653 } else {
11654 Lo = Ret.getOperand(1);
11655 Hi = Ret.getOperand(0);
11656 }
11657}
11658
11659SDValue
11661 assert((Node->getOpcode() == ISD::SMULFIX ||
11662 Node->getOpcode() == ISD::UMULFIX ||
11663 Node->getOpcode() == ISD::SMULFIXSAT ||
11664 Node->getOpcode() == ISD::UMULFIXSAT) &&
11665 "Expected a fixed point multiplication opcode");
11666
11667 SDLoc dl(Node);
11668 SDValue LHS = Node->getOperand(0);
11669 SDValue RHS = Node->getOperand(1);
11670 EVT VT = LHS.getValueType();
11671 unsigned Scale = Node->getConstantOperandVal(2);
11672 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
11673 Node->getOpcode() == ISD::UMULFIXSAT);
11674 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
11675 Node->getOpcode() == ISD::SMULFIXSAT);
11676 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11677 unsigned VTSize = VT.getScalarSizeInBits();
11678
11679 if (!Scale) {
11680 // [us]mul.fix(a, b, 0) -> mul(a, b)
11681 if (!Saturating) {
11683 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11684 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
11685 SDValue Result =
11686 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11687 SDValue Product = Result.getValue(0);
11688 SDValue Overflow = Result.getValue(1);
11689 SDValue Zero = DAG.getConstant(0, dl, VT);
11690
11691 APInt MinVal = APInt::getSignedMinValue(VTSize);
11692 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
11693 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11694 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11695 // Xor the inputs, if resulting sign bit is 0 the product will be
11696 // positive, else negative.
11697 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
11698 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
11699 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
11700 return DAG.getSelect(dl, VT, Overflow, Result, Product);
11701 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
11702 SDValue Result =
11703 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11704 SDValue Product = Result.getValue(0);
11705 SDValue Overflow = Result.getValue(1);
11706
11707 APInt MaxVal = APInt::getMaxValue(VTSize);
11708 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11709 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
11710 }
11711 }
11712
11713 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
11714 "Expected scale to be less than the number of bits if signed or at "
11715 "most the number of bits if unsigned.");
11716 assert(LHS.getValueType() == RHS.getValueType() &&
11717 "Expected both operands to be the same type");
11718
11719 // Get the upper and lower bits of the result.
11720 SDValue Lo, Hi;
11721 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
11722 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
11723 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
11724 if (VT.isVector())
11725 WideVT =
11727 if (isOperationLegalOrCustom(LoHiOp, VT)) {
11728 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
11729 Lo = Result.getValue(0);
11730 Hi = Result.getValue(1);
11731 } else if (isOperationLegalOrCustom(HiOp, VT)) {
11732 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11733 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
11734 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
11735 // Try for a multiplication using a wider type.
11736 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11737 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
11738 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
11739 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
11740 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
11741 SDValue Shifted =
11742 DAG.getNode(ISD::SRA, dl, WideVT, Res,
11743 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
11744 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
11745 } else if (VT.isVector()) {
11746 return SDValue();
11747 } else {
11748 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
11749 }
11750
11751 if (Scale == VTSize)
11752 // Result is just the top half since we'd be shifting by the width of the
11753 // operand. Overflow impossible so this works for both UMULFIX and
11754 // UMULFIXSAT.
11755 return Hi;
11756
11757 // The result will need to be shifted right by the scale since both operands
11758 // are scaled. The result is given to us in 2 halves, so we only want part of
11759 // both in the result.
11760 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
11761 DAG.getShiftAmountConstant(Scale, VT, dl));
11762 if (!Saturating)
11763 return Result;
11764
11765 if (!Signed) {
11766 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
11767 // widened multiplication) aren't all zeroes.
11768
11769 // Saturate to max if ((Hi >> Scale) != 0),
11770 // which is the same as if (Hi > ((1 << Scale) - 1))
11771 APInt MaxVal = APInt::getMaxValue(VTSize);
11772 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
11773 dl, VT);
11774 Result = DAG.getSelectCC(dl, Hi, LowMask,
11775 DAG.getConstant(MaxVal, dl, VT), Result,
11776 ISD::SETUGT);
11777
11778 return Result;
11779 }
11780
11781 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
11782 // widened multiplication) aren't all ones or all zeroes.
11783
11784 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
11785 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
11786
11787 if (Scale == 0) {
11788 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
11789 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
11790 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
11791 // Saturated to SatMin if wide product is negative, and SatMax if wide
11792 // product is positive ...
11793 SDValue Zero = DAG.getConstant(0, dl, VT);
11794 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
11795 ISD::SETLT);
11796 // ... but only if we overflowed.
11797 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
11798 }
11799
11800 // We handled Scale==0 above so all the bits to examine is in Hi.
11801
11802 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
11803 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
11804 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
11805 dl, VT);
11806 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
11807 // Saturate to min if (Hi >> (Scale - 1)) < -1),
11808 // which is the same as if (HI < (-1 << (Scale - 1))
11809 SDValue HighMask =
11810 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
11811 dl, VT);
11812 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
11813 return Result;
11814}
11815
11816SDValue
11818 SDValue LHS, SDValue RHS,
11819 unsigned Scale, SelectionDAG &DAG) const {
11820 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
11821 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
11822 "Expected a fixed point division opcode");
11823
11824 EVT VT = LHS.getValueType();
11825 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
11826 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
11827 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11828
11829 // If there is enough room in the type to upscale the LHS or downscale the
11830 // RHS before the division, we can perform it in this type without having to
11831 // resize. For signed operations, the LHS headroom is the number of
11832 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11833 // The headroom for the RHS is the number of trailing zeroes.
11834 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11836 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11837
11838 // For signed saturating operations, we need to be able to detect true integer
11839 // division overflow; that is, when you have MIN / -EPS. However, this
11840 // is undefined behavior and if we emit divisions that could take such
11841 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11842 // example).
11843 // Avoid this by requiring an extra bit so that we never get this case.
11844 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11845 // signed saturating division, we need to emit a whopping 32-bit division.
11846 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11847 return SDValue();
11848
11849 unsigned LHSShift = std::min(LHSLead, Scale);
11850 unsigned RHSShift = Scale - LHSShift;
11851
11852 // At this point, we know that if we shift the LHS up by LHSShift and the
11853 // RHS down by RHSShift, we can emit a regular division with a final scaling
11854 // factor of Scale.
11855
11856 if (LHSShift)
11857 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11858 DAG.getShiftAmountConstant(LHSShift, VT, dl));
11859 if (RHSShift)
11860 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11861 DAG.getShiftAmountConstant(RHSShift, VT, dl));
11862
11863 SDValue Quot;
11864 if (Signed) {
11865 // For signed operations, if the resulting quotient is negative and the
11866 // remainder is nonzero, subtract 1 from the quotient to round towards
11867 // negative infinity.
11868 SDValue Rem;
11869 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11870 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11871 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11872 if (isTypeLegal(VT) &&
11874 Quot = DAG.getNode(ISD::SDIVREM, dl,
11875 DAG.getVTList(VT, VT),
11876 LHS, RHS);
11877 Rem = Quot.getValue(1);
11878 Quot = Quot.getValue(0);
11879 } else {
11880 Quot = DAG.getNode(ISD::SDIV, dl, VT,
11881 LHS, RHS);
11882 Rem = DAG.getNode(ISD::SREM, dl, VT,
11883 LHS, RHS);
11884 }
11885 SDValue Zero = DAG.getConstant(0, dl, VT);
11886 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11887 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11888 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11889 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11890 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11891 DAG.getConstant(1, dl, VT));
11892 Quot = DAG.getSelect(dl, VT,
11893 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11894 Sub1, Quot);
11895 } else
11896 Quot = DAG.getNode(ISD::UDIV, dl, VT,
11897 LHS, RHS);
11898
11899 return Quot;
11900}
11901
11903 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11904 SDLoc dl(Node);
11905 SDValue LHS = Node->getOperand(0);
11906 SDValue RHS = Node->getOperand(1);
11907 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11908
11909 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11910 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11911 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11912 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11913 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11914 { LHS, RHS, CarryIn });
11915 Result = SDValue(NodeCarry.getNode(), 0);
11916 Overflow = SDValue(NodeCarry.getNode(), 1);
11917 return;
11918 }
11919
11920 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11921 LHS.getValueType(), LHS, RHS);
11922
11923 EVT ResultType = Node->getValueType(1);
11924 EVT SetCCType = getSetCCResultType(
11925 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11926 SDValue SetCC;
11927 if (IsAdd && isOneConstant(RHS)) {
11928 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11929 // the live range of X. We assume comparing with 0 is cheap.
11930 // The general case (X + C) < C is not necessarily beneficial. Although we
11931 // reduce the live range of X, we may introduce the materialization of
11932 // constant C.
11933 SetCC =
11934 DAG.getSetCC(dl, SetCCType, Result,
11935 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11936 } else if (IsAdd && isAllOnesConstant(RHS)) {
11937 // Special case: uaddo X, -1 overflows if X != 0.
11938 SetCC =
11939 DAG.getSetCC(dl, SetCCType, LHS,
11940 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11941 } else {
11942 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
11943 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11944 }
11945 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11946}
11947
11949 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11950 SDLoc dl(Node);
11951 SDValue LHS = Node->getOperand(0);
11952 SDValue RHS = Node->getOperand(1);
11953 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11954
11955 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11956 LHS.getValueType(), LHS, RHS);
11957
11958 EVT ResultType = Node->getValueType(1);
11959 EVT OType = getSetCCResultType(
11960 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11961
11962 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11963 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11964 if (isOperationLegal(OpcSat, LHS.getValueType())) {
11965 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11966 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11967 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11968 return;
11969 }
11970
11971 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11972
11973 // For an addition, the result should be less than one of the operands (LHS)
11974 // if and only if the other operand (RHS) is negative, otherwise there will
11975 // be overflow.
11976 // For a subtraction, the result should be less than one of the operands
11977 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11978 // otherwise there will be overflow.
11979 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11980 SDValue ConditionRHS =
11981 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11982
11983 Overflow = DAG.getBoolExtOrTrunc(
11984 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11985 ResultType, ResultType);
11986}
11987
11989 SDValue &Overflow, SelectionDAG &DAG) const {
11990 SDLoc dl(Node);
11991 EVT VT = Node->getValueType(0);
11992 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11993 SDValue LHS = Node->getOperand(0);
11994 SDValue RHS = Node->getOperand(1);
11995 bool isSigned = Node->getOpcode() == ISD::SMULO;
11996
11997 // For power-of-two multiplications we can use a simpler shift expansion.
11998 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11999 const APInt &C = RHSC->getAPIntValue();
12000 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
12001 if (C.isPowerOf2()) {
12002 // smulo(x, signed_min) is same as umulo(x, signed_min).
12003 bool UseArithShift = isSigned && !C.isMinSignedValue();
12004 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
12005 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
12006 Overflow = DAG.getSetCC(dl, SetCCVT,
12007 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
12008 dl, VT, Result, ShiftAmt),
12009 LHS, ISD::SETNE);
12010 return true;
12011 }
12012 }
12013
12014 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
12015 if (VT.isVector())
12016 WideVT =
12018
12019 SDValue BottomHalf;
12020 SDValue TopHalf;
12021 static const unsigned Ops[2][3] =
12024 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
12025 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12026 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
12027 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
12028 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
12029 RHS);
12030 TopHalf = BottomHalf.getValue(1);
12031 } else if (isTypeLegal(WideVT)) {
12032 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
12033 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
12034 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
12035 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
12036 SDValue ShiftAmt =
12037 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
12038 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
12039 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
12040 } else {
12041 if (VT.isVector())
12042 return false;
12043
12044 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
12045 }
12046
12047 Result = BottomHalf;
12048 if (isSigned) {
12049 SDValue ShiftAmt = DAG.getShiftAmountConstant(
12050 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
12051 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
12052 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
12053 } else {
12054 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
12055 DAG.getConstant(0, dl, VT), ISD::SETNE);
12056 }
12057
12058 // Truncate the result if SetCC returns a larger type than needed.
12059 EVT RType = Node->getValueType(1);
12060 if (RType.bitsLT(Overflow.getValueType()))
12061 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
12062
12063 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
12064 "Unexpected result type for S/UMULO legalization");
12065 return true;
12066}
12067
12069 SDLoc dl(Node);
12070 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
12071 SDValue Op = Node->getOperand(0);
12072 EVT VT = Op.getValueType();
12073
12074 // Try to use a shuffle reduction for power of two vectors.
12075 if (VT.isPow2VectorType()) {
12077 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
12078 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
12079 break;
12080
12081 SDValue Lo, Hi;
12082 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
12083 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
12084 VT = HalfVT;
12085
12086 // Stop if splitting is enough to make the reduction legal.
12087 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
12088 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
12089 Node->getFlags());
12090 }
12091 }
12092
12093 if (VT.isScalableVector())
12095 "Expanding reductions for scalable vectors is undefined.");
12096
12097 EVT EltVT = VT.getVectorElementType();
12098 unsigned NumElts = VT.getVectorNumElements();
12099
12101 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
12102
12103 SDValue Res = Ops[0];
12104 for (unsigned i = 1; i < NumElts; i++)
12105 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
12106
12107 // Result type may be wider than element type.
12108 if (EltVT != Node->getValueType(0))
12109 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
12110 return Res;
12111}
12112
12114 SDLoc dl(Node);
12115 SDValue AccOp = Node->getOperand(0);
12116 SDValue VecOp = Node->getOperand(1);
12117 SDNodeFlags Flags = Node->getFlags();
12118
12119 EVT VT = VecOp.getValueType();
12120 EVT EltVT = VT.getVectorElementType();
12121
12122 if (VT.isScalableVector())
12124 "Expanding reductions for scalable vectors is undefined.");
12125
12126 unsigned NumElts = VT.getVectorNumElements();
12127
12129 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
12130
12131 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
12132
12133 SDValue Res = AccOp;
12134 for (unsigned i = 0; i < NumElts; i++)
12135 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
12136
12137 return Res;
12138}
12139
12141 SelectionDAG &DAG) const {
12142 EVT VT = Node->getValueType(0);
12143 SDLoc dl(Node);
12144 bool isSigned = Node->getOpcode() == ISD::SREM;
12145 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
12146 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
12147 SDValue Dividend = Node->getOperand(0);
12148 SDValue Divisor = Node->getOperand(1);
12149 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
12150 SDVTList VTs = DAG.getVTList(VT, VT);
12151 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
12152 return true;
12153 }
12154 if (isOperationLegalOrCustom(DivOpc, VT)) {
12155 // X % Y -> X-X/Y*Y
12156 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
12157 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
12158 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
12159 return true;
12160 }
12161 return false;
12162}
12163
12165 SelectionDAG &DAG) const {
12166 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
12167 SDLoc dl(SDValue(Node, 0));
12168 SDValue Src = Node->getOperand(0);
12169
12170 // DstVT is the result type, while SatVT is the size to which we saturate
12171 EVT SrcVT = Src.getValueType();
12172 EVT DstVT = Node->getValueType(0);
12173
12174 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
12175 unsigned SatWidth = SatVT.getScalarSizeInBits();
12176 unsigned DstWidth = DstVT.getScalarSizeInBits();
12177 assert(SatWidth <= DstWidth &&
12178 "Expected saturation width smaller than result width");
12179
12180 // Determine minimum and maximum integer values and their corresponding
12181 // floating-point values.
12182 APInt MinInt, MaxInt;
12183 if (IsSigned) {
12184 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
12185 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
12186 } else {
12187 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
12188 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
12189 }
12190
12191 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
12192 // libcall emission cannot handle this. Large result types will fail.
12193 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
12194 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
12195 SrcVT = Src.getValueType();
12196 }
12197
12198 const fltSemantics &Sem = SrcVT.getFltSemantics();
12199 APFloat MinFloat(Sem);
12200 APFloat MaxFloat(Sem);
12201
12202 APFloat::opStatus MinStatus =
12203 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
12204 APFloat::opStatus MaxStatus =
12205 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
12206 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
12207 !(MaxStatus & APFloat::opStatus::opInexact);
12208
12209 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
12210 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
12211
12212 // If the integer bounds are exactly representable as floats and min/max are
12213 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
12214 // of comparisons and selects.
12215 auto EmitMinMax = [&](unsigned MinOpcode, unsigned MaxOpcode,
12216 bool MayPropagateNaN) {
12217 bool MinMaxLegal = isOperationLegalOrCustom(MinOpcode, SrcVT) &&
12218 isOperationLegalOrCustom(MaxOpcode, SrcVT);
12219 if (!MinMaxLegal)
12220 return SDValue();
12221
12222 SDValue Clamped = Src;
12223
12224 // Clamp Src by MinFloat from below. If !MayPropagateNaN and Src is NaN
12225 // then the result is MinFloat.
12226 Clamped = DAG.getNode(MaxOpcode, dl, SrcVT, Clamped, MinFloatNode);
12227 // Clamp by MaxFloat from above. If !MayPropagateNaN then NaN cannot occur.
12228 Clamped = DAG.getNode(MinOpcode, dl, SrcVT, Clamped, MaxFloatNode);
12229 // Convert clamped value to integer.
12230 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
12231 dl, DstVT, Clamped);
12232
12233 // If !MayPropagateNan and the conversion is unsigned case we're done,
12234 // because we mapped NaN to MinFloat, which will cast to zero.
12235 if (!MayPropagateNaN && !IsSigned)
12236 return FpToInt;
12237
12238 // Otherwise, select 0 if Src is NaN.
12239 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
12240 EVT SetCCVT =
12241 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
12242 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
12243 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
12244 };
12245 if (AreExactFloatBounds) {
12246 if (SDValue Res = EmitMinMax(ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM,
12247 /*MayPropagateNaN=*/false))
12248 return Res;
12249 // These may propagate NaN for sNaN operands.
12250 if (SDValue Res =
12251 EmitMinMax(ISD::FMINNUM, ISD::FMAXNUM, /*MayPropagateNaN=*/true))
12252 return Res;
12253 // These always propagate NaN.
12254 if (SDValue Res =
12255 EmitMinMax(ISD::FMINIMUM, ISD::FMAXIMUM, /*MayPropagateNaN=*/true))
12256 return Res;
12257 }
12258
12259 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
12260 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
12261
12262 // Result of direct conversion. The assumption here is that the operation is
12263 // non-trapping and it's fine to apply it to an out-of-range value if we
12264 // select it away later.
12265 SDValue FpToInt =
12266 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
12267
12268 SDValue Select = FpToInt;
12269
12270 EVT SetCCVT =
12271 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
12272
12273 // If Src ULT MinFloat, select MinInt. In particular, this also selects
12274 // MinInt if Src is NaN.
12275 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
12276 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
12277 // If Src OGT MaxFloat, select MaxInt.
12278 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
12279 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
12280
12281 // In the unsigned case we are done, because we mapped NaN to MinInt, which
12282 // is already zero.
12283 if (!IsSigned)
12284 return Select;
12285
12286 // Otherwise, select 0 if Src is NaN.
12287 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
12288 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
12289 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
12290}
12291
12293 const SDLoc &dl,
12294 SelectionDAG &DAG) const {
12295 EVT OperandVT = Op.getValueType();
12296 if (OperandVT.getScalarType() == ResultVT.getScalarType())
12297 return Op;
12298 EVT ResultIntVT = ResultVT.changeTypeToInteger();
12299 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12300 // can induce double-rounding which may alter the results. We can
12301 // correct for this using a trick explained in: Boldo, Sylvie, and
12302 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12303 // World Congress. 2005.
12304 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
12305 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
12306
12307 // We can keep the narrow value as-is if narrowing was exact (no
12308 // rounding error), the wide value was NaN (the narrow value is also
12309 // NaN and should be preserved) or if we rounded to the odd value.
12310 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
12311 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
12312 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
12313 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
12314 EVT ResultIntVTCCVT = getSetCCResultType(
12315 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
12316 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
12317 // The result is already odd so we don't need to do anything.
12318 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
12319
12320 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12321 Op.getValueType());
12322 // We keep results which are exact, odd or NaN.
12323 SDValue KeepNarrow =
12324 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
12325 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
12326 // We morally performed a round-down if AbsNarrow is smaller than
12327 // AbsWide.
12328 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
12329 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
12330 SDValue NarrowIsRd =
12331 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
12332 // If the narrow value is odd or exact, pick it.
12333 // Otherwise, narrow is even and corresponds to either the rounded-up
12334 // or rounded-down value. If narrow is the rounded-down value, we want
12335 // the rounded-up value as it will be odd.
12336 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
12337 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
12338 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
12339 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
12340}
12341
12343 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
12344 SDValue Op = Node->getOperand(0);
12345 EVT VT = Node->getValueType(0);
12346 SDLoc dl(Node);
12347 if (VT.getScalarType() == MVT::bf16) {
12348 if (Node->getConstantOperandVal(1) == 1) {
12349 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
12350 }
12351 EVT OperandVT = Op.getValueType();
12352 SDValue IsNaN = DAG.getSetCC(
12353 dl,
12354 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
12355 Op, Op, ISD::SETUO);
12356
12357 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12358 // can induce double-rounding which may alter the results. We can
12359 // correct for this using a trick explained in: Boldo, Sylvie, and
12360 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12361 // World Congress. 2005.
12362 EVT F32 = VT.changeElementType(*DAG.getContext(), MVT::f32);
12363 EVT I32 = F32.changeTypeToInteger();
12364 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
12365 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
12366
12367 // Conversions should set NaN's quiet bit. This also prevents NaNs from
12368 // turning into infinities.
12369 SDValue NaN =
12370 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
12371
12372 // Factor in the contribution of the low 16 bits.
12373 SDValue One = DAG.getConstant(1, dl, I32);
12374 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
12375 DAG.getShiftAmountConstant(16, I32, dl));
12376 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
12377 SDValue RoundingBias =
12378 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
12379 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
12380
12381 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
12382 // 0x80000000.
12383 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
12384
12385 // Now that we have rounded, shift the bits into position.
12386 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
12387 DAG.getShiftAmountConstant(16, I32, dl));
12388 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
12389 EVT I16 = I32.changeElementType(*DAG.getContext(), MVT::i16);
12390 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
12391 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
12392 }
12393 return SDValue();
12394}
12395
12397 SelectionDAG &DAG) const {
12398 assert((Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT ||
12399 Node->getOpcode() == ISD::VECTOR_SPLICE_RIGHT) &&
12400 "Unexpected opcode!");
12401 assert((Node->getValueType(0).isScalableVector() ||
12402 !isa<ConstantSDNode>(Node->getOperand(2))) &&
12403 "Fixed length vector types with constant offsets expected to use "
12404 "SHUFFLE_VECTOR!");
12405
12406 EVT VT = Node->getValueType(0);
12407 SDValue V1 = Node->getOperand(0);
12408 SDValue V2 = Node->getOperand(1);
12409 SDValue Offset = Node->getOperand(2);
12410 SDLoc DL(Node);
12411
12412 // Expand through memory thusly:
12413 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
12414 // Store V1, Ptr
12415 // Store V2, Ptr + sizeof(V1)
12416 // if (VECTOR_SPLICE_LEFT)
12417 // Ptr = Ptr + (Offset * sizeof(VT.Elt))
12418 // else
12419 // Ptr = Ptr + sizeof(V1) - (Offset * size(VT.Elt))
12420 // Res = Load Ptr
12421
12422 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
12423
12425 VT.getVectorElementCount() * 2);
12426 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12427 EVT PtrVT = StackPtr.getValueType();
12428 auto &MF = DAG.getMachineFunction();
12429 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12430 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12431
12432 // Store the lo part of CONCAT_VECTORS(V1, V2)
12433 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
12434 // Store the hi part of CONCAT_VECTORS(V1, V2)
12435 SDValue VTBytes = DAG.getTypeSize(DL, PtrVT, VT.getStoreSize());
12436 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, VTBytes);
12437 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
12438
12439 // NOTE: TrailingBytes must be clamped so as not to read outside of V1:V2.
12440 SDValue EltByteSize =
12441 DAG.getTypeSize(DL, PtrVT, VT.getVectorElementType().getStoreSize());
12442 Offset = DAG.getZExtOrTrunc(Offset, DL, PtrVT);
12443 SDValue TrailingBytes = DAG.getNode(ISD::MUL, DL, PtrVT, Offset, EltByteSize);
12444
12445 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VTBytes);
12446
12447 if (Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT)
12448 StackPtr = DAG.getMemBasePlusOffset(StackPtr, TrailingBytes, DL);
12449 else
12450 StackPtr = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
12451
12452 // Load the spliced result
12453 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
12455}
12456
12458 SelectionDAG &DAG) const {
12459 SDLoc DL(Node);
12460 SDValue Vec = Node->getOperand(0);
12461 SDValue Mask = Node->getOperand(1);
12462 SDValue Passthru = Node->getOperand(2);
12463
12464 EVT VecVT = Vec.getValueType();
12465 EVT ScalarVT = VecVT.getScalarType();
12466 EVT MaskVT = Mask.getValueType();
12467 EVT MaskScalarVT = MaskVT.getScalarType();
12468
12469 // Needs to be handled by targets that have scalable vector types.
12470 if (VecVT.isScalableVector())
12471 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
12472
12473 SDValue StackPtr = DAG.CreateStackTemporary(
12474 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
12475 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12476 MachinePointerInfo PtrInfo =
12478
12479 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
12480 SDValue Chain = DAG.getEntryNode();
12481 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
12482
12483 bool HasPassthru = !Passthru.isUndef();
12484
12485 // If we have a passthru vector, store it on the stack, overwrite the matching
12486 // positions and then re-write the last element that was potentially
12487 // overwritten even though mask[i] = false.
12488 if (HasPassthru)
12489 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
12490
12491 SDValue LastWriteVal;
12492 APInt PassthruSplatVal;
12493 bool IsSplatPassthru =
12494 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
12495
12496 if (IsSplatPassthru) {
12497 // As we do not know which position we wrote to last, we cannot simply
12498 // access that index from the passthru vector. So we first check if passthru
12499 // is a splat vector, to use any element ...
12500 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
12501 } else if (HasPassthru) {
12502 // ... if it is not a splat vector, we need to get the passthru value at
12503 // position = popcount(mask) and re-load it from the stack before it is
12504 // overwritten in the loop below.
12505 EVT PopcountVT = ScalarVT.changeTypeToInteger();
12506 SDValue Popcount = DAG.getNode(
12508 MaskVT.changeVectorElementType(*DAG.getContext(), MVT::i1), Mask);
12509 Popcount = DAG.getNode(
12511 MaskVT.changeVectorElementType(*DAG.getContext(), PopcountVT),
12512 Popcount);
12513 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
12514 SDValue LastElmtPtr =
12515 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
12516 LastWriteVal = DAG.getLoad(
12517 ScalarVT, DL, Chain, LastElmtPtr,
12519 Chain = LastWriteVal.getValue(1);
12520 }
12521
12522 unsigned NumElms = VecVT.getVectorNumElements();
12523 for (unsigned I = 0; I < NumElms; I++) {
12524 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
12525 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12526 Chain = DAG.getStore(
12527 Chain, DL, ValI, OutPtr,
12529
12530 // Get the mask value and add it to the current output position. This
12531 // either increments by 1 if MaskI is true or adds 0 otherwise.
12532 // Freeze in case we have poison/undef mask entries.
12533 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
12534 MaskI = DAG.getFreeze(MaskI);
12535 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
12536 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
12537 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
12538
12539 if (HasPassthru && I == NumElms - 1) {
12540 SDValue EndOfVector =
12541 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
12542 SDValue AllLanesSelected =
12543 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
12544 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
12545 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
12546
12547 // Re-write the last ValI if all lanes were selected. Otherwise,
12548 // overwrite the last write it with the passthru value.
12549 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
12550 LastWriteVal, SDNodeFlags::Unpredictable);
12551 Chain = DAG.getStore(
12552 Chain, DL, LastWriteVal, OutPtr,
12554 }
12555 }
12556
12557 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12558}
12559
12561 SelectionDAG &DAG) const {
12562 SDLoc DL(N);
12563 SDValue Acc = N->getOperand(0);
12564 SDValue MulLHS = N->getOperand(1);
12565 SDValue MulRHS = N->getOperand(2);
12566 EVT AccVT = Acc.getValueType();
12567 EVT MulOpVT = MulLHS.getValueType();
12568
12569 EVT ExtMulOpVT =
12571 MulOpVT.getVectorElementCount());
12572
12573 unsigned ExtOpcLHS, ExtOpcRHS;
12574 switch (N->getOpcode()) {
12575 default:
12576 llvm_unreachable("Unexpected opcode");
12578 ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
12579 break;
12581 ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
12582 break;
12584 ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
12585 break;
12586 }
12587
12588 if (ExtMulOpVT != MulOpVT) {
12589 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
12590 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
12591 }
12592 SDValue Input = MulLHS;
12593 if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
12594 if (!llvm::isOneOrOneSplatFP(MulRHS))
12595 Input = DAG.getNode(ISD::FMUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12596 } else if (!llvm::isOneOrOneSplat(MulRHS)) {
12597 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
12598 }
12599
12600 unsigned Stride = AccVT.getVectorMinNumElements();
12601 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
12602
12603 // Collect all of the subvectors
12604 std::deque<SDValue> Subvectors = {Acc};
12605 for (unsigned I = 0; I < ScaleFactor; I++)
12606 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
12607
12608 unsigned FlatNode =
12609 N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
12610
12611 // Flatten the subvector tree
12612 while (Subvectors.size() > 1) {
12613 Subvectors.push_back(
12614 DAG.getNode(FlatNode, DL, AccVT, {Subvectors[0], Subvectors[1]}));
12615 Subvectors.pop_front();
12616 Subvectors.pop_front();
12617 }
12618
12619 assert(Subvectors.size() == 1 &&
12620 "There should only be one subvector after tree flattening");
12621
12622 return Subvectors[0];
12623}
12624
12625/// Given a store node \p StoreNode, return true if it is safe to fold that node
12626/// into \p FPNode, which expands to a library call with output pointers.
12628 SDNode *FPNode) {
12630 SmallVector<const SDNode *, 8> DeferredNodes;
12632
12633 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
12634 for (SDValue Op : StoreNode->ops())
12635 if (Op.getNode() != FPNode)
12636 Worklist.push_back(Op.getNode());
12637
12639 while (!Worklist.empty()) {
12640 const SDNode *Node = Worklist.pop_back_val();
12641 auto [_, Inserted] = Visited.insert(Node);
12642 if (!Inserted)
12643 continue;
12644
12645 if (MaxSteps > 0 && Visited.size() >= MaxSteps)
12646 return false;
12647
12648 // Reached the FPNode (would result in a cycle).
12649 // OR Reached CALLSEQ_START (would result in nested call sequences).
12650 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
12651 return false;
12652
12653 if (Node->getOpcode() == ISD::CALLSEQ_END) {
12654 // Defer looking into call sequences (so we can check we're outside one).
12655 // We still need to look through these for the predecessor check.
12656 DeferredNodes.push_back(Node);
12657 continue;
12658 }
12659
12660 for (SDValue Op : Node->ops())
12661 Worklist.push_back(Op.getNode());
12662 }
12663
12664 // True if we're outside a call sequence and don't have the FPNode as a
12665 // predecessor. No cycles or nested call sequences possible.
12666 return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
12667 MaxSteps);
12668}
12669
12671 SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
12673 std::optional<unsigned> CallRetResNo) const {
12674 if (LC == RTLIB::UNKNOWN_LIBCALL)
12675 return false;
12676
12677 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
12678 if (LibcallImpl == RTLIB::Unsupported)
12679 return false;
12680
12681 LLVMContext &Ctx = *DAG.getContext();
12682 EVT VT = Node->getValueType(0);
12683 unsigned NumResults = Node->getNumValues();
12684
12685 // Find users of the node that store the results (and share input chains). The
12686 // destination pointers can be used instead of creating stack allocations.
12687 SDValue StoresInChain;
12688 SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
12689 for (SDNode *User : Node->users()) {
12691 continue;
12692 auto *ST = cast<StoreSDNode>(User);
12693 SDValue StoreValue = ST->getValue();
12694 unsigned ResNo = StoreValue.getResNo();
12695 // Ensure the store corresponds to an output pointer.
12696 if (CallRetResNo == ResNo)
12697 continue;
12698 // Ensure the store to the default address space and not atomic or volatile.
12699 if (!ST->isSimple() || ST->getAddressSpace() != 0)
12700 continue;
12701 // Ensure all store chains are the same (so they don't alias).
12702 if (StoresInChain && ST->getChain() != StoresInChain)
12703 continue;
12704 // Ensure the store is properly aligned.
12705 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
12706 if (ST->getAlign() <
12707 DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType()))
12708 continue;
12709 // Avoid:
12710 // 1. Creating cyclic dependencies.
12711 // 2. Expanding the node to a call within a call sequence.
12713 continue;
12714 ResultStores[ResNo] = ST;
12715 StoresInChain = ST->getChain();
12716 }
12717
12718 ArgListTy Args;
12719
12720 // Pass the arguments.
12721 for (const SDValue &Op : Node->op_values()) {
12722 EVT ArgVT = Op.getValueType();
12723 Type *ArgTy = ArgVT.getTypeForEVT(Ctx);
12724 Args.emplace_back(Op, ArgTy);
12725 }
12726
12727 // Pass the output pointers.
12728 SmallVector<SDValue, 2> ResultPtrs(NumResults);
12730 for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) {
12731 if (ResNo == CallRetResNo)
12732 continue;
12733 EVT ResVT = Node->getValueType(ResNo);
12734 SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT);
12735 ResultPtrs[ResNo] = ResultPtr;
12736 Args.emplace_back(ResultPtr, PointerTy);
12737 }
12738
12739 SDLoc DL(Node);
12740
12742 // Pass the vector mask (if required).
12743 EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
12744 SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT);
12745 Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
12746 }
12747
12748 Type *RetType = CallRetResNo.has_value()
12749 ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
12750 : Type::getVoidTy(Ctx);
12751 SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
12752 SDValue Callee =
12753 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
12755 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
12756 getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args));
12757
12758 auto [Call, CallChain] = LowerCallTo(CLI);
12759
12760 for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
12761 if (ResNo == CallRetResNo) {
12762 Results.push_back(Call);
12763 continue;
12764 }
12765 MachinePointerInfo PtrInfo;
12766 SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain,
12767 ResultPtr, PtrInfo);
12768 SDValue OutChain = LoadResult.getValue(1);
12769
12770 if (StoreSDNode *ST = ResultStores[ResNo]) {
12771 // Replace store with the library call.
12772 DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
12773 PtrInfo = ST->getPointerInfo();
12774 } else {
12776 DAG.getMachineFunction(),
12777 cast<FrameIndexSDNode>(ResultPtr)->getIndex());
12778 }
12779
12780 Results.push_back(LoadResult);
12781 }
12782
12783 return true;
12784}
12785
12787 SDValue &LHS, SDValue &RHS,
12788 SDValue &CC, SDValue Mask,
12789 SDValue EVL, bool &NeedInvert,
12790 const SDLoc &dl, SDValue &Chain,
12791 bool IsSignaling) const {
12792 MVT OpVT = LHS.getSimpleValueType();
12793 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
12794 NeedInvert = false;
12795 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
12796 bool IsNonVP = !EVL;
12797 switch (getCondCodeAction(CCCode, OpVT)) {
12798 default:
12799 llvm_unreachable("Unknown condition code action!");
12801 // Nothing to do.
12802 break;
12805 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12806 std::swap(LHS, RHS);
12807 CC = DAG.getCondCode(InvCC);
12808 return true;
12809 }
12810 // Swapping operands didn't work. Try inverting the condition.
12811 bool NeedSwap = false;
12812 InvCC = getSetCCInverse(CCCode, OpVT);
12813 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
12814 // If inverting the condition is not enough, try swapping operands
12815 // on top of it.
12816 InvCC = ISD::getSetCCSwappedOperands(InvCC);
12817 NeedSwap = true;
12818 }
12819 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
12820 CC = DAG.getCondCode(InvCC);
12821 NeedInvert = true;
12822 if (NeedSwap)
12823 std::swap(LHS, RHS);
12824 return true;
12825 }
12826
12827 // Special case: expand i1 comparisons using logical operations.
12828 if (OpVT == MVT::i1) {
12829 SDValue Ret;
12830 switch (CCCode) {
12831 default:
12832 llvm_unreachable("Unknown integer setcc!");
12833 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
12834 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
12835 MVT::i1);
12836 break;
12837 case ISD::SETNE: // X != Y --> (X ^ Y)
12838 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
12839 break;
12840 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12841 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12842 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
12843 DAG.getNOT(dl, LHS, MVT::i1));
12844 break;
12845 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12846 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12847 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
12848 DAG.getNOT(dl, RHS, MVT::i1));
12849 break;
12850 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12851 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12852 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
12853 DAG.getNOT(dl, LHS, MVT::i1));
12854 break;
12855 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12856 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12857 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
12858 DAG.getNOT(dl, RHS, MVT::i1));
12859 break;
12860 }
12861
12862 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
12863 RHS = SDValue();
12864 CC = SDValue();
12865 return true;
12866 }
12867
12869 unsigned Opc = 0;
12870 switch (CCCode) {
12871 default:
12872 llvm_unreachable("Don't know how to expand this condition!");
12873 case ISD::SETUO:
12874 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
12875 CC1 = ISD::SETUNE;
12876 CC2 = ISD::SETUNE;
12877 Opc = ISD::OR;
12878 break;
12879 }
12881 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
12882 NeedInvert = true;
12883 [[fallthrough]];
12884 case ISD::SETO:
12886 "If SETO is expanded, SETOEQ must be legal!");
12887 CC1 = ISD::SETOEQ;
12888 CC2 = ISD::SETOEQ;
12889 Opc = ISD::AND;
12890 break;
12891 case ISD::SETONE:
12892 case ISD::SETUEQ:
12893 // If the SETUO or SETO CC isn't legal, we might be able to use
12894 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
12895 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
12896 // the operands.
12897 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12898 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
12899 isCondCodeLegal(ISD::SETOLT, OpVT))) {
12900 CC1 = ISD::SETOGT;
12901 CC2 = ISD::SETOLT;
12902 Opc = ISD::OR;
12903 NeedInvert = ((unsigned)CCCode & 0x8U);
12904 break;
12905 }
12906 [[fallthrough]];
12907 case ISD::SETOEQ:
12908 case ISD::SETOGT:
12909 case ISD::SETOGE:
12910 case ISD::SETOLT:
12911 case ISD::SETOLE:
12912 case ISD::SETUNE:
12913 case ISD::SETUGT:
12914 case ISD::SETUGE:
12915 case ISD::SETULT:
12916 case ISD::SETULE:
12917 // If we are floating point, assign and break, otherwise fall through.
12918 if (!OpVT.isInteger()) {
12919 // We can use the 4th bit to tell if we are the unordered
12920 // or ordered version of the opcode.
12921 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
12922 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
12923 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
12924 break;
12925 }
12926 // Fallthrough if we are unsigned integer.
12927 [[fallthrough]];
12928 case ISD::SETLE:
12929 case ISD::SETGT:
12930 case ISD::SETGE:
12931 case ISD::SETLT:
12932 case ISD::SETNE:
12933 case ISD::SETEQ:
12934 // If all combinations of inverting the condition and swapping operands
12935 // didn't work then we have no means to expand the condition.
12936 llvm_unreachable("Don't know how to expand this condition!");
12937 }
12938
12939 SDValue SetCC1, SetCC2;
12940 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
12941 // If we aren't the ordered or unorder operation,
12942 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
12943 if (IsNonVP) {
12944 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
12945 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
12946 } else {
12947 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
12948 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
12949 }
12950 } else {
12951 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
12952 if (IsNonVP) {
12953 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
12954 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
12955 } else {
12956 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
12957 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
12958 }
12959 }
12960 if (Chain)
12961 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
12962 SetCC2.getValue(1));
12963 if (IsNonVP)
12964 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
12965 else {
12966 // Transform the binary opcode to the VP equivalent.
12967 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
12968 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
12969 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
12970 }
12971 RHS = SDValue();
12972 CC = SDValue();
12973 return true;
12974 }
12975 }
12976 return false;
12977}
12978
12980 SelectionDAG &DAG) const {
12981 EVT VT = Node->getValueType(0);
12982 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
12983 // split into two equal parts.
12984 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
12985 return SDValue();
12986
12987 // Restrict expansion to cases where both parts can be concatenated.
12988 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
12989 if (LoVT != HiVT || !isTypeLegal(LoVT))
12990 return SDValue();
12991
12992 SDLoc DL(Node);
12993 unsigned Opcode = Node->getOpcode();
12994
12995 // Don't expand if the result is likely to be unrolled anyway.
12996 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
12997 return SDValue();
12998
12999 SmallVector<SDValue, 4> LoOps, HiOps;
13000 for (const SDValue &V : Node->op_values()) {
13001 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
13002 LoOps.push_back(Lo);
13003 HiOps.push_back(Hi);
13004 }
13005
13006 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps);
13007 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps);
13008 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
13009}
13010
13012 const SDLoc &DL,
13013 EVT InVecVT, SDValue EltNo,
13014 LoadSDNode *OriginalLoad,
13015 SelectionDAG &DAG) const {
13016 assert(OriginalLoad->isSimple());
13017
13018 EVT VecEltVT = InVecVT.getVectorElementType();
13019
13020 // If the vector element type is not a multiple of a byte then we are unable
13021 // to correctly compute an address to load only the extracted element as a
13022 // scalar.
13023 if (!VecEltVT.isByteSized())
13024 return SDValue();
13025
13026 ISD::LoadExtType ExtTy =
13027 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
13028 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
13029 return SDValue();
13030
13031 std::optional<unsigned> ByteOffset;
13032 Align Alignment = OriginalLoad->getAlign();
13034 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
13035 int Elt = ConstEltNo->getZExtValue();
13036 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
13037 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
13038 Alignment = commonAlignment(Alignment, *ByteOffset);
13039 } else {
13040 // Discard the pointer info except the address space because the memory
13041 // operand can't represent this new access since the offset is variable.
13042 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
13043 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
13044 }
13045
13046 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
13047 return SDValue();
13048
13049 unsigned IsFast = 0;
13050 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
13051 OriginalLoad->getAddressSpace(), Alignment,
13052 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
13053 !IsFast)
13054 return SDValue();
13055
13056 // The original DAG loaded the entire vector from memory, so arithmetic
13057 // within it must be inbounds.
13059 DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
13060
13061 // We are replacing a vector load with a scalar load. The new load must have
13062 // identical memory op ordering to the original.
13063 SDValue Load;
13064 if (ResultVT.bitsGT(VecEltVT)) {
13065 // If the result type of vextract is wider than the load, then issue an
13066 // extending load instead.
13067 ISD::LoadExtType ExtType =
13068 isLoadLegal(ResultVT, VecEltVT, Alignment,
13069 OriginalLoad->getAddressSpace(), ISD::ZEXTLOAD, false)
13071 : ISD::EXTLOAD;
13072 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
13073 NewPtr, MPI, VecEltVT, Alignment,
13074 OriginalLoad->getMemOperand()->getFlags(),
13075 OriginalLoad->getAAInfo());
13076 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
13077 } else {
13078 // The result type is narrower or the same width as the vector element
13079 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
13080 Alignment, OriginalLoad->getMemOperand()->getFlags(),
13081 OriginalLoad->getAAInfo());
13082 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
13083 if (ResultVT.bitsLT(VecEltVT))
13084 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
13085 else
13086 Load = DAG.getBitcast(ResultVT, Load);
13087 }
13088
13089 return Load;
13090}
13091
13092// Set type id for call site info and metadata 'call_target'.
13093// We are filtering for:
13094// a) The call-graph-section use case that wants to know about indirect
13095// calls, or
13096// b) We want to annotate indirect calls.
13098 const CallBase *CB, MachineFunction &MF,
13099 MachineFunction::CallSiteInfo &CSInfo) const {
13100 if (CB && CB->isIndirectCall() &&
13103 CSInfo = MachineFunction::CallSiteInfo(*CB);
13104}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
#define X(NUM, ENUM, NAME)
Definition ELF.h:849
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define _
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T
#define T1
uint64_t High
#define P(N)
Function const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static bool canNarrowCLMULToLegal(const TargetLowering &TLI, LLVMContext &Ctx, EVT VT, unsigned HalveDepth=0, unsigned TotalDepth=0)
Check if CLMUL on VT can eventually reach a type with legal CLMUL through a chain of halving decompos...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, SDNode *FPNode)
Given a store node StoreNode, return true if it is safe to fold that node into FPNode,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1402
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.h:1213
APInt bitcastToAPInt() const
Definition APFloat.h:1408
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1193
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1153
void changeSign()
Definition APFloat.h:1352
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1164
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1584
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1769
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1421
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1406
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1400
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1044
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1527
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1345
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1355
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1256
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1411
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
void negate()
Negate this APInt in place.
Definition APInt.h:1483
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1654
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1613
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1546
unsigned countLeadingZeros() const
Definition APInt.h:1621
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1450
unsigned logBase2() const
Definition APInt.h:1776
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
void setAllBits()
Set every bit to 1.
Definition APInt.h:1334
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1285
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1157
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:996
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1382
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition APInt.h:1432
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1403
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition APInt.h:1457
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1671
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition APInt.h:1358
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:859
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:215
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
Definition InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MCRegister getLiveInPhysReg(Register VReg) const
getLiveInPhysReg - If VReg is a live-in virtual register, return the corresponding live-in physical r...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition Module.h:445
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
iterator end() const
Definition ArrayRef.h:343
iterator begin() const
Definition ArrayRef.h:342
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl, SDNodeFlags Flags={})
Constant fold a setcc to true or false.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
LLVM_ABI std::optional< unsigned > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, bool OrZero=false, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:137
iterator end() const
Definition StringRef.h:115
Class to represent struct types.
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool preferSelectsOverBooleanArithmetic(EVT VT) const
Should we prefer selects to doing arithmetic on boolean types.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
EVT getLegalTypeToTransformTo(LLVMContext &Context, EVT VT) const
Perform getTypeToTransformTo repeatedly until a legal type is obtained.
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall implementation.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
TargetLoweringBase(const TargetMachine &TM, const TargetSubtargetInfo &STI)
NOTE: The TargetMachine owns TLOF.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
bool isLoadLegal(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return true if the specified load with extension is legal on this target.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
bool expandMultipleResultFPLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl< SDValue > &Results, std::optional< unsigned > CallRetResNo={}) const
Expands a node with multiple results to an FP or vector libcall.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
SmallVector< ConstraintPair > ConstraintGroup
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const
Expand carryless multiply.
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual SDValue unwrapAddress(SDValue N) const
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const
For most targets, an LLVM type must be broken down into multiple smaller types.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode, SDNodeFlags Flags={}) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
SDValue expandCTLS(SDNode *N, SelectionDAG &DAG) const
Expand CTLS (count leading sign bits) nodes.
void setTypeIdForCallsiteInfo(const CallBase *CB, MachineFunction &MF, MachineFunction::CallSiteInfo &CSInfo) const
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
unsigned EmitCallSiteInfo
The flag enables call site info production.
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:808
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:313
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:328
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:110
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:717
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3020
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:788
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:538
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:394
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:522
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:400
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:774
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:407
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:787
@ PARTIAL_REDUCE_FMLA
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BRIND
BRIND - Indirect branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:356
@ VECTOR_SPLICE_LEFT
VECTOR_SPLICE_LEFT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1, VEC2) left by OFFSET elements an...
Definition ISDOpcodes.h:653
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:413
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ VECTOR_SPLICE_RIGHT
VECTOR_SPLICE_RIGHT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1,VEC2) right by OFFSET elements a...
Definition ISDOpcodes.h:657
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:945
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI NodeType getOppositeSignednessMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns the corresponding opcode with the opposi...
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
NUses_match< 1, Value_match > m_OneUse()
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
void stable_sort(R &&Range)
Definition STLExtras.h:2116
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1607
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
LLVM_ABI bool isOneOrOneSplatFP(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant floating-point value, or a splatted vector of a constant float...
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
void * PointerTy
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1589
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:173
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1777
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
@ AfterLegalizeTypes
Definition DAGCombine.h:17
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isZeroOrZeroSplat(SDValue N, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1636
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:129
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:471
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:251
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:367
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:438
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:478
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:420
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
bool isFixedLengthVector() const
Definition ValueTypes.h:189
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:182
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:121
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:316
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:461
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:317
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:192
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition KnownBits.h:271
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
bool isZero() const
Returns true if value is all zero.
Definition KnownBits.h:80
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:258
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
void setAllConflict()
Make all bits known to be both zero and one.
Definition KnownBits.h:99
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:167
KnownBits byteSwap() const
Definition KnownBits.h:538
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:305
KnownBits reverseBits() const
Definition KnownBits.h:542
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition KnownBits.h:249
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:178
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:337
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition KnownBits.h:69
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:327
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:186
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:264
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:148
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition KnownBits.cpp:61
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:173
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:302
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl)
Returns true if the function has a vector mask argument, which is assumed to be the last argument.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
void setNoSignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
std::string ConstraintCode
This contains the actual string for the code, like "m".
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true, bool AllowWidenOptimization=false)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...