LLVM 23.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
27#include "llvm/IR/DataLayout.h"
30#include "llvm/IR/LLVMContext.h"
31#include "llvm/MC/MCAsmInfo.h"
32#include "llvm/MC/MCExpr.h"
38#include <cctype>
39#include <deque>
40using namespace llvm;
41using namespace llvm::SDPatternMatch;
42
43/// NOTE: The TargetMachine owns TLOF.
47
48// Define the virtual destructor out-of-line for build efficiency.
50
51const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
52 return nullptr;
53}
54
58
59/// Check whether a given call node is in tail position within its function. If
60/// so, it sets Chain to the input chain of the tail call.
62 SDValue &Chain) const {
64
65 // First, check if tail calls have been disabled in this function.
66 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
67 return false;
68
69 // Conservatively require the attributes of the call to match those of
70 // the return. Ignore following attributes because they don't affect the
71 // call sequence.
72 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
73 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
74 Attribute::DereferenceableOrNull, Attribute::NoAlias,
75 Attribute::NonNull, Attribute::NoUndef,
76 Attribute::Range, Attribute::NoFPClass})
77 CallerAttrs.removeAttribute(Attr);
78
79 if (CallerAttrs.hasAttributes())
80 return false;
81
82 // It's not safe to eliminate the sign / zero extension of the return value.
83 if (CallerAttrs.contains(Attribute::ZExt) ||
84 CallerAttrs.contains(Attribute::SExt))
85 return false;
86
87 // Check if the only use is a function return node.
88 return isUsedByReturnOnly(Node, Chain);
89}
90
92 const uint32_t *CallerPreservedMask,
93 const SmallVectorImpl<CCValAssign> &ArgLocs,
94 const SmallVectorImpl<SDValue> &OutVals) const {
95 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
96 const CCValAssign &ArgLoc = ArgLocs[I];
97 if (!ArgLoc.isRegLoc())
98 continue;
99 MCRegister Reg = ArgLoc.getLocReg();
100 // Only look at callee saved registers.
101 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
102 continue;
103 // Check that we pass the value used for the caller.
104 // (We look for a CopyFromReg reading a virtual register that is used
105 // for the function live-in value of register Reg)
106 SDValue Value = OutVals[I];
107 if (Value->getOpcode() == ISD::AssertZext)
108 Value = Value.getOperand(0);
109 if (Value->getOpcode() != ISD::CopyFromReg)
110 return false;
111 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
112 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
113 return false;
114 }
115 return true;
116}
117
118/// Set CallLoweringInfo attribute flags based on a call instruction
119/// and called function attributes.
121 unsigned ArgIdx) {
122 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
123 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
124 IsNoExt = Call->paramHasAttr(ArgIdx, Attribute::NoExt);
125 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
126 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
127 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
128 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
129 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
130 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
131 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
132 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
133 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
134 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
135 Alignment = Call->getParamStackAlign(ArgIdx);
136 IndirectType = nullptr;
138 "multiple ABI attributes?");
139 if (IsByVal) {
140 IndirectType = Call->getParamByValType(ArgIdx);
141 if (!Alignment)
142 Alignment = Call->getParamAlign(ArgIdx);
143 }
144 if (IsPreallocated)
145 IndirectType = Call->getParamPreallocatedType(ArgIdx);
146 if (IsInAlloca)
147 IndirectType = Call->getParamInAllocaType(ArgIdx);
148 if (IsSRet)
149 IndirectType = Call->getParamStructRetType(ArgIdx);
150}
151
152/// Generate a libcall taking the given operands as arguments and returning a
153/// result of type RetVT.
154std::pair<SDValue, SDValue>
155TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl,
157 MakeLibCallOptions CallOptions, const SDLoc &dl,
158 SDValue InChain) const {
159 if (LibcallImpl == RTLIB::Unsupported)
160 reportFatalInternalError("unsupported library call operation");
161
162 if (!InChain)
163 InChain = DAG.getEntryNode();
164
166 Args.reserve(Ops.size());
167
168 ArrayRef<Type *> OpsTypeOverrides = CallOptions.OpsTypeOverrides;
169 for (unsigned i = 0; i < Ops.size(); ++i) {
170 SDValue NewOp = Ops[i];
171 Type *Ty = i < OpsTypeOverrides.size() && OpsTypeOverrides[i]
172 ? OpsTypeOverrides[i]
173 : NewOp.getValueType().getTypeForEVT(*DAG.getContext());
174 TargetLowering::ArgListEntry Entry(NewOp, Ty);
175 if (CallOptions.IsSoften)
176 Entry.OrigTy =
177 CallOptions.OpsVTBeforeSoften[i].getTypeForEVT(*DAG.getContext());
178
179 Entry.IsSExt =
180 shouldSignExtendTypeInLibCall(Entry.Ty, CallOptions.IsSigned);
181 Entry.IsZExt = !Entry.IsSExt;
182
183 if (CallOptions.IsSoften &&
185 Entry.IsSExt = Entry.IsZExt = false;
186 }
187 Args.push_back(Entry);
188 }
189
190 SDValue Callee =
191 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
192
193 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
194 Type *OrigRetTy = RetTy;
196 bool signExtend = shouldSignExtendTypeInLibCall(RetTy, CallOptions.IsSigned);
197 bool zeroExtend = !signExtend;
198
199 if (CallOptions.IsSoften) {
200 OrigRetTy = CallOptions.RetVTBeforeSoften.getTypeForEVT(*DAG.getContext());
202 signExtend = zeroExtend = false;
203 }
204
205 CLI.setDebugLoc(dl)
206 .setChain(InChain)
207 .setLibCallee(getLibcallImplCallingConv(LibcallImpl), RetTy, OrigRetTy,
208 Callee, std::move(Args))
209 .setNoReturn(CallOptions.DoesNotReturn)
212 .setSExtResult(signExtend)
213 .setZExtResult(zeroExtend);
214 return LowerCallTo(CLI);
215}
216
218 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
219 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
220 const AttributeList &FuncAttributes, EVT *LargestVT) const {
221 EVT VT = getOptimalMemOpType(Context, Op, FuncAttributes);
222
223 if (VT == MVT::Other) {
224 // Use the largest integer type whose alignment constraints are satisfied.
225 VT = MVT::LAST_INTEGER_VALUETYPE;
226 if (Op.isFixedDstAlign()) {
227 bool LoadsFromSrc = Op.isMemcpy() && !Op.isMemcpyStrSrc();
228 while (VT != MVT::i8) {
229 unsigned VTSize = VT.getSizeInBits() / 8;
230 bool DstOk =
231 Op.getDstAlign() >= VTSize ||
232 allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign());
233 bool SrcOk =
234 !LoadsFromSrc || Op.getSrcAlign() >= VTSize ||
235 allowsMisalignedMemoryAccesses(VT, SrcAS, Op.getSrcAlign());
236 if (DstOk && SrcOk)
237 break;
239 }
240 }
241 assert(VT.isInteger());
242
243 // Find the largest legal integer type.
244 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
245 while (!isTypeLegal(LVT))
246 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
247 assert(LVT.isInteger());
248
249 // If the type we've chosen is larger than the largest legal integer type
250 // then use the largest legal type.
251 if (VT.bitsGT(LVT))
252 VT = LVT;
253 }
254
255 unsigned NumMemOps = 0;
256 uint64_t Size = Op.size();
257 while (Size) {
258 unsigned VTSize = VT.getSizeInBits() / 8;
259 while (VTSize > Size) {
260 // For now, only use non-vector load / store's for the left-over pieces.
261 EVT NewVT = VT;
262 unsigned NewVTSize;
263
264 bool Found = false;
265 if (VT.isVector() || VT.isFloatingPoint()) {
266 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
269 Found = true;
270 else if (NewVT == MVT::i64 &&
272 isSafeMemOpType(MVT::f64)) {
273 // i64 is usually not legal on 32-bit targets, but f64 may be.
274 NewVT = MVT::f64;
275 Found = true;
276 }
277 }
278
279 if (!Found) {
280 do {
281 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
282 if (NewVT == MVT::i8)
283 break;
284 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
285 }
286 NewVTSize = NewVT.getSizeInBits() / 8;
287
288 // If the new VT cannot cover all of the remaining bits, then consider
289 // issuing a (or a pair of) unaligned and overlapping load / store.
290 unsigned Fast;
291 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
293 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
295 Fast)
296 VTSize = Size;
297 else {
298 VT = NewVT;
299 VTSize = NewVTSize;
300 }
301 }
302
303 if (++NumMemOps > Limit)
304 return false;
305
306 MemOps.push_back(VT);
307 Size -= VTSize;
308 }
309
310 return true;
311}
312
313/// Soften the operands of a comparison. This code is shared among BR_CC,
314/// SELECT_CC, and SETCC handlers.
316 SDValue &NewLHS, SDValue &NewRHS,
317 ISD::CondCode &CCCode,
318 const SDLoc &dl, const SDValue OldLHS,
319 const SDValue OldRHS) const {
320 SDValue Chain;
321 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
322 OldRHS, Chain);
323}
324
326 SDValue &NewLHS, SDValue &NewRHS,
327 ISD::CondCode &CCCode,
328 const SDLoc &dl, const SDValue OldLHS,
329 const SDValue OldRHS,
330 SDValue &Chain,
331 bool IsSignaling) const {
332 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
333 // not supporting it. We can update this code when libgcc provides such
334 // functions.
335
336 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
337 && "Unsupported setcc type!");
338
339 // Expand into one or more soft-fp libcall(s).
340 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
341 bool ShouldInvertCC = false;
342 switch (CCCode) {
343 case ISD::SETEQ:
344 case ISD::SETOEQ:
345 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
346 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
347 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
348 break;
349 case ISD::SETNE:
350 case ISD::SETUNE:
351 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
352 (VT == MVT::f64) ? RTLIB::UNE_F64 :
353 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
354 break;
355 case ISD::SETGE:
356 case ISD::SETOGE:
357 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
358 (VT == MVT::f64) ? RTLIB::OGE_F64 :
359 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
360 break;
361 case ISD::SETLT:
362 case ISD::SETOLT:
363 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
364 (VT == MVT::f64) ? RTLIB::OLT_F64 :
365 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
366 break;
367 case ISD::SETLE:
368 case ISD::SETOLE:
369 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
370 (VT == MVT::f64) ? RTLIB::OLE_F64 :
371 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
372 break;
373 case ISD::SETGT:
374 case ISD::SETOGT:
375 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
376 (VT == MVT::f64) ? RTLIB::OGT_F64 :
377 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
378 break;
379 case ISD::SETO:
380 ShouldInvertCC = true;
381 [[fallthrough]];
382 case ISD::SETUO:
383 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
384 (VT == MVT::f64) ? RTLIB::UO_F64 :
385 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
386 break;
387 case ISD::SETONE:
388 // SETONE = O && UNE
389 ShouldInvertCC = true;
390 [[fallthrough]];
391 case ISD::SETUEQ:
392 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
393 (VT == MVT::f64) ? RTLIB::UO_F64 :
394 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
395 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
396 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
397 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
398 break;
399 default:
400 // Invert CC for unordered comparisons
401 ShouldInvertCC = true;
402 switch (CCCode) {
403 case ISD::SETULT:
404 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
405 (VT == MVT::f64) ? RTLIB::OGE_F64 :
406 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
407 break;
408 case ISD::SETULE:
409 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
410 (VT == MVT::f64) ? RTLIB::OGT_F64 :
411 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
412 break;
413 case ISD::SETUGT:
414 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
415 (VT == MVT::f64) ? RTLIB::OLE_F64 :
416 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
417 break;
418 case ISD::SETUGE:
419 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
420 (VT == MVT::f64) ? RTLIB::OLT_F64 :
421 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
422 break;
423 default: llvm_unreachable("Do not know how to soften this setcc!");
424 }
425 }
426
427 // Use the target specific return value for comparison lib calls.
429 SDValue Ops[2] = {NewLHS, NewRHS};
431 EVT OpsVT[2] = { OldLHS.getValueType(),
432 OldRHS.getValueType() };
433 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
434 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
435 NewLHS = Call.first;
436 NewRHS = DAG.getConstant(0, dl, RetVT);
437
438 RTLIB::LibcallImpl LC1Impl = getLibcallImpl(LC1);
439 if (LC1Impl == RTLIB::Unsupported) {
441 "no libcall available to soften floating-point compare");
442 }
443
444 CCCode = getSoftFloatCmpLibcallPredicate(LC1Impl);
445 if (ShouldInvertCC) {
446 assert(RetVT.isInteger());
447 CCCode = getSetCCInverse(CCCode, RetVT);
448 }
449
450 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
451 // Update Chain.
452 Chain = Call.second;
453 } else {
454 RTLIB::LibcallImpl LC2Impl = getLibcallImpl(LC2);
455 if (LC2Impl == RTLIB::Unsupported) {
457 "no libcall available to soften floating-point compare");
458 }
459
460 assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
461 "unordered call should be simple boolean");
462
463 EVT SetCCVT =
464 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
466 NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
467 DAG.getValueType(MVT::i1));
468 }
469
470 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
471 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
472 CCCode = getSoftFloatCmpLibcallPredicate(LC2Impl);
473 if (ShouldInvertCC)
474 CCCode = getSetCCInverse(CCCode, RetVT);
475 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
476 if (Chain)
477 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
478 Call2.second);
479 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
480 Tmp.getValueType(), Tmp, NewLHS);
481 NewRHS = SDValue();
482 }
483}
484
485/// Return the entry encoding for a jump table in the current function. The
486/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
488 // In non-pic modes, just use the address of a block.
491
492 // Otherwise, use a label difference.
494}
495
497 SelectionDAG &DAG) const {
498 return Table;
499}
500
501/// This returns the relocation base for the given PIC jumptable, the same as
502/// getPICJumpTableRelocBase, but as an MCExpr.
503const MCExpr *
505 unsigned JTI,MCContext &Ctx) const{
506 // The normal PIC reloc base is the label at the start of the jump table.
507 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
508}
509
511 SDValue Addr, int JTI,
512 SelectionDAG &DAG) const {
513 SDValue Chain = Value;
514 // Jump table debug info is only needed if CodeView is enabled.
516 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
517 }
518 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
519}
520
521bool
523 const TargetMachine &TM = getTargetMachine();
524 const GlobalValue *GV = GA->getGlobal();
525
526 // If the address is not even local to this DSO we will have to load it from
527 // a got and then add the offset.
528 if (!TM.shouldAssumeDSOLocal(GV))
529 return false;
530
531 // If the code is position independent we will have to add a base register.
533 return false;
534
535 // Otherwise we can do it.
536 return true;
537}
538
539//===----------------------------------------------------------------------===//
540// Optimization Methods
541//===----------------------------------------------------------------------===//
542
543/// If the specified instruction has a constant integer operand and there are
544/// bits set in that constant that are not demanded, then clear those bits and
545/// return true.
547 const APInt &DemandedBits,
548 const APInt &DemandedElts,
549 TargetLoweringOpt &TLO) const {
550 SDLoc DL(Op);
551 unsigned Opcode = Op.getOpcode();
552
553 // Early-out if we've ended up calling an undemanded node, leave this to
554 // constant folding.
555 if (DemandedBits.isZero() || DemandedElts.isZero())
556 return false;
557
558 // Do target-specific constant optimization.
559 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
560 return TLO.New.getNode();
561
562 // FIXME: ISD::SELECT, ISD::SELECT_CC
563 switch (Opcode) {
564 default:
565 break;
566 case ISD::XOR:
567 case ISD::AND:
568 case ISD::OR: {
569 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
570 if (!Op1C || Op1C->isOpaque())
571 return false;
572
573 // If this is a 'not' op, don't touch it because that's a canonical form.
574 const APInt &C = Op1C->getAPIntValue();
575 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
576 return false;
577
578 if (!C.isSubsetOf(DemandedBits)) {
579 EVT VT = Op.getValueType();
580 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
581 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
582 Op->getFlags());
583 return TLO.CombineTo(Op, NewOp);
584 }
585
586 break;
587 }
588 }
589
590 return false;
591}
592
594 const APInt &DemandedBits,
595 TargetLoweringOpt &TLO) const {
596 EVT VT = Op.getValueType();
597 APInt DemandedElts = VT.isVector()
599 : APInt(1, 1);
600 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
601}
602
603/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
604/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
605/// but it could be generalized for targets with other types of implicit
606/// widening casts.
608 const APInt &DemandedBits,
609 TargetLoweringOpt &TLO) const {
610 assert(Op.getNumOperands() == 2 &&
611 "ShrinkDemandedOp only supports binary operators!");
612 assert(Op.getNode()->getNumValues() == 1 &&
613 "ShrinkDemandedOp only supports nodes with one result!");
614
615 EVT VT = Op.getValueType();
616 SelectionDAG &DAG = TLO.DAG;
617 SDLoc dl(Op);
618
619 // Early return, as this function cannot handle vector types.
620 if (VT.isVector())
621 return false;
622
623 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
624 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
625 "ShrinkDemandedOp only supports operands that have the same size!");
626
627 // Don't do this if the node has another user, which may require the
628 // full value.
629 if (!Op.getNode()->hasOneUse())
630 return false;
631
632 // Search for the smallest integer type with free casts to and from
633 // Op's type. For expedience, just check power-of-2 integer types.
634 unsigned DemandedSize = DemandedBits.getActiveBits();
635 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
636 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
637 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
638 if (isTruncateFree(Op, SmallVT) && isZExtFree(SmallVT, VT)) {
639 // We found a type with free casts.
640
641 // If the operation has the 'disjoint' flag, then the
642 // operands on the new node are also disjoint.
643 SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint
645 unsigned Opcode = Op.getOpcode();
646 if (Opcode == ISD::PTRADD) {
647 // It isn't a ptradd anymore if it doesn't operate on the entire
648 // pointer.
649 Opcode = ISD::ADD;
650 }
651 SDValue X = DAG.getNode(
652 Opcode, dl, SmallVT,
653 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
654 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags);
655 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
656 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
657 return TLO.CombineTo(Op, Z);
658 }
659 }
660 return false;
661}
662
664 DAGCombinerInfo &DCI) const {
665 SelectionDAG &DAG = DCI.DAG;
666 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
667 !DCI.isBeforeLegalizeOps());
668 KnownBits Known;
669
670 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
671 if (Simplified) {
672 DCI.AddToWorklist(Op.getNode());
674 }
675 return Simplified;
676}
677
679 const APInt &DemandedElts,
680 DAGCombinerInfo &DCI) const {
681 SelectionDAG &DAG = DCI.DAG;
682 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
683 !DCI.isBeforeLegalizeOps());
684 KnownBits Known;
685
686 bool Simplified =
687 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
688 if (Simplified) {
689 DCI.AddToWorklist(Op.getNode());
691 }
692 return Simplified;
693}
694
696 KnownBits &Known,
698 unsigned Depth,
699 bool AssumeSingleUse) const {
700 EVT VT = Op.getValueType();
701
702 // Since the number of lanes in a scalable vector is unknown at compile time,
703 // we track one bit which is implicitly broadcast to all lanes. This means
704 // that all lanes in a scalable vector are considered demanded.
705 APInt DemandedElts = VT.isFixedLengthVector()
707 : APInt(1, 1);
708 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
709 AssumeSingleUse);
710}
711
712// TODO: Under what circumstances can we create nodes? Constant folding?
714 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
715 SelectionDAG &DAG, unsigned Depth) const {
716 EVT VT = Op.getValueType();
717
718 // Limit search depth.
720 return SDValue();
721
722 // Ignore UNDEFs.
723 if (Op.isUndef())
724 return SDValue();
725
726 // Not demanding any bits/elts from Op.
727 if (DemandedBits == 0 || DemandedElts == 0)
728 return DAG.getUNDEF(VT);
729
730 bool IsLE = DAG.getDataLayout().isLittleEndian();
731 unsigned NumElts = DemandedElts.getBitWidth();
732 unsigned BitWidth = DemandedBits.getBitWidth();
733 KnownBits LHSKnown, RHSKnown;
734 switch (Op.getOpcode()) {
735 case ISD::BITCAST: {
736 if (VT.isScalableVector())
737 return SDValue();
738
739 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
740 EVT SrcVT = Src.getValueType();
741 EVT DstVT = Op.getValueType();
742 if (SrcVT == DstVT)
743 return Src;
744
745 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
746 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
747 if (NumSrcEltBits == NumDstEltBits)
749 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
750 return DAG.getBitcast(DstVT, V);
751
752 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
753 unsigned Scale = NumDstEltBits / NumSrcEltBits;
754 unsigned NumSrcElts = SrcVT.getVectorNumElements();
755 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
756 for (unsigned i = 0; i != Scale; ++i) {
757 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
758 unsigned BitOffset = EltOffset * NumSrcEltBits;
759 DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
760 }
761 // Recursive calls below may turn not demanded elements into poison, so we
762 // need to demand all smaller source elements that maps to a demanded
763 // destination element.
764 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
765
767 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
768 return DAG.getBitcast(DstVT, V);
769 }
770
771 // TODO - bigendian once we have test coverage.
772 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
773 unsigned Scale = NumSrcEltBits / NumDstEltBits;
774 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
775 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
776 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
777 for (unsigned i = 0; i != NumElts; ++i)
778 if (DemandedElts[i]) {
779 unsigned Offset = (i % Scale) * NumDstEltBits;
780 DemandedSrcBits.insertBits(DemandedBits, Offset);
781 DemandedSrcElts.setBit(i / Scale);
782 }
783
785 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
786 return DAG.getBitcast(DstVT, V);
787 }
788
789 break;
790 }
791 case ISD::AND: {
792 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
793 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
794
795 // If all of the demanded bits are known 1 on one side, return the other.
796 // These bits cannot contribute to the result of the 'and' in this
797 // context.
798 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
799 return Op.getOperand(0);
800 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
801 return Op.getOperand(1);
802 break;
803 }
804 case ISD::OR: {
805 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
806 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
807
808 // If all of the demanded bits are known zero on one side, return the
809 // other. These bits cannot contribute to the result of the 'or' in this
810 // context.
811 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
812 return Op.getOperand(0);
813 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
814 return Op.getOperand(1);
815 break;
816 }
817 case ISD::XOR: {
818 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
819 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
820
821 // If all of the demanded bits are known zero on one side, return the
822 // other.
823 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
824 return Op.getOperand(0);
825 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
826 return Op.getOperand(1);
827 break;
828 }
829 case ISD::ADD:
830 case ISD::MUL:
831 case ISD::SMIN:
832 case ISD::SMAX:
833 case ISD::UMIN:
834 case ISD::UMAX: {
835 if (DAG.isIdentityElement(Op.getOpcode(), Op->getFlags(), Op.getOperand(1),
836 DemandedElts, 1, Depth + 1))
837 return Op.getOperand(0);
838
839 if (DAG.isIdentityElement(Op.getOpcode(), Op->getFlags(), Op.getOperand(0),
840 DemandedElts, 0, Depth + 1))
841 return Op.getOperand(1);
842 break;
843 }
844 case ISD::SHL: {
845 // If we are only demanding sign bits then we can use the shift source
846 // directly.
847 if (std::optional<unsigned> MaxSA =
848 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
849 SDValue Op0 = Op.getOperand(0);
850 unsigned ShAmt = *MaxSA;
851 unsigned NumSignBits =
852 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
853 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
854 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
855 return Op0;
856 }
857 break;
858 }
859 case ISD::SRL: {
860 // If we are only demanding sign bits then we can use the shift source
861 // directly.
862 if (std::optional<unsigned> MaxSA =
863 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
864 SDValue Op0 = Op.getOperand(0);
865 unsigned ShAmt = *MaxSA;
866 // Must already be signbits in DemandedBits bounds, and can't demand any
867 // shifted in zeroes.
868 if (DemandedBits.countl_zero() >= ShAmt) {
869 unsigned NumSignBits =
870 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
871 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
872 return Op0;
873 }
874 }
875 break;
876 }
877 case ISD::SETCC: {
878 SDValue Op0 = Op.getOperand(0);
879 SDValue Op1 = Op.getOperand(1);
880 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
881 // If (1) we only need the sign-bit, (2) the setcc operands are the same
882 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
883 // -1, we may be able to bypass the setcc.
884 if (DemandedBits.isSignMask() &&
888 // If we're testing X < 0, then this compare isn't needed - just use X!
889 // FIXME: We're limiting to integer types here, but this should also work
890 // if we don't care about FP signed-zero. The use of SETLT with FP means
891 // that we don't care about NaNs.
892 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
894 return Op0;
895 }
896 break;
897 }
899 // If none of the extended bits are demanded, eliminate the sextinreg.
900 SDValue Op0 = Op.getOperand(0);
901 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
902 unsigned ExBits = ExVT.getScalarSizeInBits();
903 if (DemandedBits.getActiveBits() <= ExBits &&
905 return Op0;
906 // If the input is already sign extended, just drop the extension.
907 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
908 if (NumSignBits >= (BitWidth - ExBits + 1))
909 return Op0;
910 break;
911 }
915 if (VT.isScalableVector())
916 return SDValue();
917
918 // If we only want the lowest element and none of extended bits, then we can
919 // return the bitcasted source vector.
920 SDValue Src = Op.getOperand(0);
921 EVT SrcVT = Src.getValueType();
922 EVT DstVT = Op.getValueType();
923 if (IsLE && DemandedElts == 1 &&
924 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
925 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
926 return DAG.getBitcast(DstVT, Src);
927 }
928 break;
929 }
931 if (VT.isScalableVector())
932 return SDValue();
933
934 // If we don't demand the inserted element, return the base vector.
935 SDValue Vec = Op.getOperand(0);
936 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
937 EVT VecVT = Vec.getValueType();
938 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
939 !DemandedElts[CIdx->getZExtValue()])
940 return Vec;
941 break;
942 }
944 if (VT.isScalableVector())
945 return SDValue();
946
947 SDValue Vec = Op.getOperand(0);
948 SDValue Sub = Op.getOperand(1);
949 uint64_t Idx = Op.getConstantOperandVal(2);
950 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
951 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
952 // If we don't demand the inserted subvector, return the base vector.
953 if (DemandedSubElts == 0)
954 return Vec;
955 break;
956 }
957 case ISD::VECTOR_SHUFFLE: {
959 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
960
961 // If all the demanded elts are from one operand and are inline,
962 // then we can use the operand directly.
963 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
964 for (unsigned i = 0; i != NumElts; ++i) {
965 int M = ShuffleMask[i];
966 if (M < 0 || !DemandedElts[i])
967 continue;
968 AllUndef = false;
969 IdentityLHS &= (M == (int)i);
970 IdentityRHS &= ((M - NumElts) == i);
971 }
972
973 if (AllUndef)
974 return DAG.getUNDEF(Op.getValueType());
975 if (IdentityLHS)
976 return Op.getOperand(0);
977 if (IdentityRHS)
978 return Op.getOperand(1);
979 break;
980 }
981 default:
982 // TODO: Probably okay to remove after audit; here to reduce change size
983 // in initial enablement patch for scalable vectors
984 if (VT.isScalableVector())
985 return SDValue();
986
987 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
989 Op, DemandedBits, DemandedElts, DAG, Depth))
990 return V;
991 break;
992 }
993 return SDValue();
994}
995
998 unsigned Depth) const {
999 EVT VT = Op.getValueType();
1000 // Since the number of lanes in a scalable vector is unknown at compile time,
1001 // we track one bit which is implicitly broadcast to all lanes. This means
1002 // that all lanes in a scalable vector are considered demanded.
1003 APInt DemandedElts = VT.isFixedLengthVector()
1005 : APInt(1, 1);
1006 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1007 Depth);
1008}
1009
1011 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
1012 unsigned Depth) const {
1013 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
1014 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
1015 Depth);
1016}
1017
1018// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
1019// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
1022 const TargetLowering &TLI,
1023 const APInt &DemandedBits,
1024 const APInt &DemandedElts, unsigned Depth) {
1025 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
1026 "SRL or SRA node is required here!");
1027 // Is the right shift using an immediate value of 1?
1028 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
1029 if (!N1C || !N1C->isOne())
1030 return SDValue();
1031
1032 // We are looking for an avgfloor
1033 // add(ext, ext)
1034 // or one of these as a avgceil
1035 // add(add(ext, ext), 1)
1036 // add(add(ext, 1), ext)
1037 // add(ext, add(ext, 1))
1038 SDValue Add = Op.getOperand(0);
1039 if (Add.getOpcode() != ISD::ADD)
1040 return SDValue();
1041
1042 SDValue ExtOpA = Add.getOperand(0);
1043 SDValue ExtOpB = Add.getOperand(1);
1044 SDValue Add2;
1045 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
1046 ConstantSDNode *ConstOp;
1047 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
1048 ConstOp->isOne()) {
1049 ExtOpA = Op1;
1050 ExtOpB = Op3;
1051 Add2 = A;
1052 return true;
1053 }
1054 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
1055 ConstOp->isOne()) {
1056 ExtOpA = Op1;
1057 ExtOpB = Op2;
1058 Add2 = A;
1059 return true;
1060 }
1061 return false;
1062 };
1063 bool IsCeil =
1064 (ExtOpA.getOpcode() == ISD::ADD &&
1065 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
1066 (ExtOpB.getOpcode() == ISD::ADD &&
1067 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1068
1069 // If the shift is signed (sra):
1070 // - Needs >= 2 sign bit for both operands.
1071 // - Needs >= 2 zero bits.
1072 // If the shift is unsigned (srl):
1073 // - Needs >= 1 zero bit for both operands.
1074 // - Needs 1 demanded bit zero and >= 2 sign bits.
1075 SelectionDAG &DAG = TLO.DAG;
1076 unsigned ShiftOpc = Op.getOpcode();
1077 bool IsSigned = false;
1078 unsigned KnownBits;
1079 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1080 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1081 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1082 unsigned NumZeroA =
1083 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1084 unsigned NumZeroB =
1085 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1086 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1087
1088 switch (ShiftOpc) {
1089 default:
1090 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1091 case ISD::SRA: {
1092 if (NumZero >= 2 && NumSigned < NumZero) {
1093 IsSigned = false;
1094 KnownBits = NumZero;
1095 break;
1096 }
1097 if (NumSigned >= 1) {
1098 IsSigned = true;
1099 KnownBits = NumSigned;
1100 break;
1101 }
1102 return SDValue();
1103 }
1104 case ISD::SRL: {
1105 if (NumZero >= 1 && NumSigned < NumZero) {
1106 IsSigned = false;
1107 KnownBits = NumZero;
1108 break;
1109 }
1110 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1111 IsSigned = true;
1112 KnownBits = NumSigned;
1113 break;
1114 }
1115 return SDValue();
1116 }
1117 }
1118
1119 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1120 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1121
1122 // Find the smallest power-2 type that is legal for this vector size and
1123 // operation, given the original type size and the number of known sign/zero
1124 // bits.
1125 EVT VT = Op.getValueType();
1126 unsigned MinWidth =
1127 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1128 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1130 return SDValue();
1131 if (VT.isVector())
1132 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1133 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1134 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1135 // larger type size to do the transform.
1136 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1137 return SDValue();
1138 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1139 Add.getOperand(1)) &&
1140 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1141 Add2.getOperand(1))))
1142 NVT = VT;
1143 else
1144 return SDValue();
1145 }
1146
1147 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1148 // this is likely to stop other folds (reassociation, value tracking etc.)
1149 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1150 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1151 return SDValue();
1152
1153 SDLoc DL(Op);
1154 SDValue ResultAVG =
1155 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1156 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1157 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1158}
1159
1160/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1161/// result of Op are ever used downstream. If we can use this information to
1162/// simplify Op, create a new simplified DAG node and return true, returning the
1163/// original and new nodes in Old and New. Otherwise, analyze the expression and
1164/// return a mask of Known bits for the expression (used to simplify the
1165/// caller). The Known bits may only be accurate for those bits in the
1166/// OriginalDemandedBits and OriginalDemandedElts.
1168 SDValue Op, const APInt &OriginalDemandedBits,
1169 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1170 unsigned Depth, bool AssumeSingleUse) const {
1171 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1172 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1173 "Mask size mismatches value type size!");
1174
1175 // Don't know anything.
1176 Known = KnownBits(BitWidth);
1177
1178 EVT VT = Op.getValueType();
1179 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1180 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1181 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1182 "Unexpected vector size");
1183
1184 APInt DemandedBits = OriginalDemandedBits;
1185 APInt DemandedElts = OriginalDemandedElts;
1186 SDLoc dl(Op);
1187
1188 // Undef operand.
1189 if (Op.isUndef())
1190 return false;
1191
1192 // We can't simplify target constants.
1193 if (Op.getOpcode() == ISD::TargetConstant)
1194 return false;
1195
1196 if (Op.getOpcode() == ISD::Constant) {
1197 // We know all of the bits for a constant!
1198 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1199 return false;
1200 }
1201
1202 if (Op.getOpcode() == ISD::ConstantFP) {
1203 // We know all of the bits for a floating point constant!
1205 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1206 return false;
1207 }
1208
1209 // Other users may use these bits.
1210 bool HasMultiUse = false;
1211 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1213 // Limit search depth.
1214 return false;
1215 }
1216 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1218 DemandedElts = APInt::getAllOnes(NumElts);
1219 HasMultiUse = true;
1220 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1221 // Not demanding any bits/elts from Op.
1222 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1223 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1224 // Limit search depth.
1225 return false;
1226 }
1227
1228 KnownBits Known2;
1229 switch (Op.getOpcode()) {
1230 case ISD::SCALAR_TO_VECTOR: {
1231 if (VT.isScalableVector())
1232 return false;
1233 if (!DemandedElts[0])
1234 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1235
1236 KnownBits SrcKnown;
1237 SDValue Src = Op.getOperand(0);
1238 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1239 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1240 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1241 return true;
1242
1243 // Upper elements are undef, so only get the knownbits if we just demand
1244 // the bottom element.
1245 if (DemandedElts == 1)
1246 Known = SrcKnown.anyextOrTrunc(BitWidth);
1247 break;
1248 }
1249 case ISD::BUILD_VECTOR:
1250 // Collect the known bits that are shared by every demanded element.
1251 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1252 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1253 return false; // Don't fall through, will infinitely loop.
1254 case ISD::SPLAT_VECTOR: {
1255 SDValue Scl = Op.getOperand(0);
1256 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1257 KnownBits KnownScl;
1258 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1259 return true;
1260
1261 // Implicitly truncate the bits to match the official semantics of
1262 // SPLAT_VECTOR.
1263 Known = KnownScl.trunc(BitWidth);
1264 break;
1265 }
1266 case ISD::FREEZE: {
1267 SDValue N0 = Op.getOperand(0);
1269 N0, DemandedElts, UndefPoisonKind::UndefOrPoison, Depth + 1))
1270 return TLO.CombineTo(Op, N0);
1271 break;
1272 }
1273 case ISD::LOAD: {
1274 auto *LD = cast<LoadSDNode>(Op);
1275 if (getTargetConstantFromLoad(LD)) {
1276 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1277 return false; // Don't fall through, will infinitely loop.
1278 }
1279 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1280 // If this is a ZEXTLoad and we are looking at the loaded value.
1281 EVT MemVT = LD->getMemoryVT();
1282 unsigned MemBits = MemVT.getScalarSizeInBits();
1283 Known.Zero.setBitsFrom(MemBits);
1284 return false; // Don't fall through, will infinitely loop.
1285 }
1286 break;
1287 }
1289 if (VT.isScalableVector())
1290 return false;
1291 SDValue Vec = Op.getOperand(0);
1292 SDValue Scl = Op.getOperand(1);
1293 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1294 EVT VecVT = Vec.getValueType();
1295
1296 // If index isn't constant, assume we need all vector elements AND the
1297 // inserted element.
1298 APInt DemandedVecElts(DemandedElts);
1299 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1300 unsigned Idx = CIdx->getZExtValue();
1301 DemandedVecElts.clearBit(Idx);
1302
1303 // Inserted element is not required.
1304 if (!DemandedElts[Idx])
1305 return TLO.CombineTo(Op, Vec);
1306 }
1307
1308 KnownBits KnownScl;
1309 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1310 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1311 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1312 return true;
1313
1314 Known = KnownScl.anyextOrTrunc(BitWidth);
1315
1316 KnownBits KnownVec;
1317 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1318 Depth + 1))
1319 return true;
1320
1321 if (!!DemandedVecElts)
1322 Known = Known.intersectWith(KnownVec);
1323
1324 return false;
1325 }
1326 case ISD::INSERT_SUBVECTOR: {
1327 if (VT.isScalableVector())
1328 return false;
1329 // Demand any elements from the subvector and the remainder from the src its
1330 // inserted into.
1331 SDValue Src = Op.getOperand(0);
1332 SDValue Sub = Op.getOperand(1);
1333 uint64_t Idx = Op.getConstantOperandVal(2);
1334 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1335 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1336 APInt DemandedSrcElts = DemandedElts;
1337 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
1338
1339 KnownBits KnownSub, KnownSrc;
1340 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1341 Depth + 1))
1342 return true;
1343 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1344 Depth + 1))
1345 return true;
1346
1347 Known.setAllConflict();
1348 if (!!DemandedSubElts)
1349 Known = Known.intersectWith(KnownSub);
1350 if (!!DemandedSrcElts)
1351 Known = Known.intersectWith(KnownSrc);
1352
1353 // Attempt to avoid multi-use src if we don't need anything from it.
1354 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1355 !DemandedSrcElts.isAllOnes()) {
1357 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1359 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1360 if (NewSub || NewSrc) {
1361 NewSub = NewSub ? NewSub : Sub;
1362 NewSrc = NewSrc ? NewSrc : Src;
1363 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1364 Op.getOperand(2));
1365 return TLO.CombineTo(Op, NewOp);
1366 }
1367 }
1368 break;
1369 }
1371 if (VT.isScalableVector())
1372 return false;
1373 // Offset the demanded elts by the subvector index.
1374 SDValue Src = Op.getOperand(0);
1375 if (Src.getValueType().isScalableVector())
1376 break;
1377 uint64_t Idx = Op.getConstantOperandVal(1);
1378 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1379 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1380
1381 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1382 Depth + 1))
1383 return true;
1384
1385 // Attempt to avoid multi-use src if we don't need anything from it.
1386 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1388 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1389 if (DemandedSrc) {
1390 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1391 Op.getOperand(1));
1392 return TLO.CombineTo(Op, NewOp);
1393 }
1394 }
1395 break;
1396 }
1397 case ISD::CONCAT_VECTORS: {
1398 if (VT.isScalableVector())
1399 return false;
1400 Known.setAllConflict();
1401 EVT SubVT = Op.getOperand(0).getValueType();
1402 unsigned NumSubVecs = Op.getNumOperands();
1403 unsigned NumSubElts = SubVT.getVectorNumElements();
1404 for (unsigned i = 0; i != NumSubVecs; ++i) {
1405 APInt DemandedSubElts =
1406 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1407 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1408 Known2, TLO, Depth + 1))
1409 return true;
1410 // Known bits are shared by every demanded subvector element.
1411 if (!!DemandedSubElts)
1412 Known = Known.intersectWith(Known2);
1413 }
1414 break;
1415 }
1416 case ISD::VECTOR_SHUFFLE: {
1417 assert(!VT.isScalableVector());
1418 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1419
1420 // Collect demanded elements from shuffle operands..
1421 APInt DemandedLHS, DemandedRHS;
1422 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1423 DemandedRHS))
1424 break;
1425
1426 if (!!DemandedLHS || !!DemandedRHS) {
1427 SDValue Op0 = Op.getOperand(0);
1428 SDValue Op1 = Op.getOperand(1);
1429
1430 Known.setAllConflict();
1431 if (!!DemandedLHS) {
1432 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1433 Depth + 1))
1434 return true;
1435 Known = Known.intersectWith(Known2);
1436 }
1437 if (!!DemandedRHS) {
1438 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1439 Depth + 1))
1440 return true;
1441 Known = Known.intersectWith(Known2);
1442 }
1443
1444 // Attempt to avoid multi-use ops if we don't need anything from them.
1446 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1448 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1449 if (DemandedOp0 || DemandedOp1) {
1450 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1451 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1452 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1453 return TLO.CombineTo(Op, NewOp);
1454 }
1455 }
1456 break;
1457 }
1458 case ISD::AND: {
1459 SDValue Op0 = Op.getOperand(0);
1460 SDValue Op1 = Op.getOperand(1);
1461
1462 // If the RHS is a constant, check to see if the LHS would be zero without
1463 // using the bits from the RHS. Below, we use knowledge about the RHS to
1464 // simplify the LHS, here we're using information from the LHS to simplify
1465 // the RHS.
1466 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1467 // Do not increment Depth here; that can cause an infinite loop.
1468 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1469 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1470 if ((LHSKnown.Zero & DemandedBits) ==
1471 (~RHSC->getAPIntValue() & DemandedBits))
1472 return TLO.CombineTo(Op, Op0);
1473
1474 // If any of the set bits in the RHS are known zero on the LHS, shrink
1475 // the constant.
1476 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1477 DemandedElts, TLO))
1478 return true;
1479
1480 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1481 // constant, but if this 'and' is only clearing bits that were just set by
1482 // the xor, then this 'and' can be eliminated by shrinking the mask of
1483 // the xor. For example, for a 32-bit X:
1484 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1485 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1486 LHSKnown.One == ~RHSC->getAPIntValue()) {
1487 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1488 return TLO.CombineTo(Op, Xor);
1489 }
1490 }
1491
1492 // (X +/- Y) & Y --> ~X & Y when Y is a power of 2 (or zero).
1493 SDValue X, Y;
1494 if (sd_match(Op,
1495 m_And(m_Value(Y),
1497 m_Sub(m_Value(X), m_Deferred(Y)))))) &&
1498 TLO.DAG.isKnownToBeAPowerOfTwo(Y, DemandedElts, /*OrZero=*/true)) {
1499 return TLO.CombineTo(
1500 Op, TLO.DAG.getNode(ISD::AND, dl, VT, TLO.DAG.getNOT(dl, X, VT), Y));
1501 }
1502
1503 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1504 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1505 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1506 (Op0.getOperand(0).isUndef() ||
1508 Op0->hasOneUse()) {
1509 unsigned NumSubElts =
1511 unsigned SubIdx = Op0.getConstantOperandVal(2);
1512 APInt DemandedSub =
1513 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1514 KnownBits KnownSubMask =
1515 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1516 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1517 SDValue NewAnd =
1518 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1519 SDValue NewInsert =
1520 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1521 Op0.getOperand(1), Op0.getOperand(2));
1522 return TLO.CombineTo(Op, NewInsert);
1523 }
1524 }
1525
1526 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1527 Depth + 1))
1528 return true;
1529 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1530 Known2, TLO, Depth + 1))
1531 return true;
1532
1533 // If all of the demanded bits are known one on one side, return the other.
1534 // These bits cannot contribute to the result of the 'and'.
1535 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1536 return TLO.CombineTo(Op, Op0);
1537 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1538 return TLO.CombineTo(Op, Op1);
1539 // If all of the demanded bits in the inputs are known zeros, return zero.
1540 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1541 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1542 // If the RHS is a constant, see if we can simplify it.
1543 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1544 TLO))
1545 return true;
1546 // If the operation can be done in a smaller type, do so.
1548 return true;
1549
1550 // Attempt to avoid multi-use ops if we don't need anything from them.
1551 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1553 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1555 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1556 if (DemandedOp0 || DemandedOp1) {
1557 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1558 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1559 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1560 return TLO.CombineTo(Op, NewOp);
1561 }
1562 }
1563
1564 Known &= Known2;
1565 break;
1566 }
1567 case ISD::OR: {
1568 SDValue Op0 = Op.getOperand(0);
1569 SDValue Op1 = Op.getOperand(1);
1570 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1571 Depth + 1)) {
1572 Op->dropFlags(SDNodeFlags::Disjoint);
1573 return true;
1574 }
1575
1576 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1577 Known2, TLO, Depth + 1)) {
1578 Op->dropFlags(SDNodeFlags::Disjoint);
1579 return true;
1580 }
1581
1582 // If all of the demanded bits are known zero on one side, return the other.
1583 // These bits cannot contribute to the result of the 'or'.
1584 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1585 return TLO.CombineTo(Op, Op0);
1586 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1587 return TLO.CombineTo(Op, Op1);
1588 // If the RHS is a constant, see if we can simplify it.
1589 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1590 return true;
1591 // If the operation can be done in a smaller type, do so.
1593 return true;
1594
1595 // Attempt to avoid multi-use ops if we don't need anything from them.
1596 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1598 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1600 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1601 if (DemandedOp0 || DemandedOp1) {
1602 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1603 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1604 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1605 return TLO.CombineTo(Op, NewOp);
1606 }
1607 }
1608
1609 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1610 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1611 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1612 Op0->hasOneUse() && Op1->hasOneUse()) {
1613 // Attempt to match all commutations - m_c_Or would've been useful!
1614 for (int I = 0; I != 2; ++I) {
1615 SDValue X = Op.getOperand(I).getOperand(0);
1616 SDValue C1 = Op.getOperand(I).getOperand(1);
1617 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1618 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1619 if (Alt.getOpcode() == ISD::OR) {
1620 for (int J = 0; J != 2; ++J) {
1621 if (X == Alt.getOperand(J)) {
1622 SDValue Y = Alt.getOperand(1 - J);
1623 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1624 {C1, C2})) {
1625 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1626 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1627 return TLO.CombineTo(
1628 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1629 }
1630 }
1631 }
1632 }
1633 }
1634 }
1635
1636 Known |= Known2;
1637 break;
1638 }
1639 case ISD::XOR: {
1640 SDValue Op0 = Op.getOperand(0);
1641 SDValue Op1 = Op.getOperand(1);
1642
1643 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1644 Depth + 1))
1645 return true;
1646 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1647 Depth + 1))
1648 return true;
1649
1650 // If all of the demanded bits are known zero on one side, return the other.
1651 // These bits cannot contribute to the result of the 'xor'.
1652 if (DemandedBits.isSubsetOf(Known.Zero))
1653 return TLO.CombineTo(Op, Op0);
1654 if (DemandedBits.isSubsetOf(Known2.Zero))
1655 return TLO.CombineTo(Op, Op1);
1656 // If the operation can be done in a smaller type, do so.
1658 return true;
1659
1660 // If all of the unknown bits are known to be zero on one side or the other
1661 // turn this into an *inclusive* or.
1662 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1663 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1664 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1665
1666 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1667 if (C) {
1668 // If one side is a constant, and all of the set bits in the constant are
1669 // also known set on the other side, turn this into an AND, as we know
1670 // the bits will be cleared.
1671 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1672 // NB: it is okay if more bits are known than are requested
1673 if (C->getAPIntValue() == Known2.One) {
1674 SDValue ANDC =
1675 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1676 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1677 }
1678
1679 // If the RHS is a constant, see if we can change it. Don't alter a -1
1680 // constant because that's a 'not' op, and that is better for combining
1681 // and codegen.
1682 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1683 // We're flipping all demanded bits. Flip the undemanded bits too.
1684 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1685 return TLO.CombineTo(Op, New);
1686 }
1687
1688 unsigned Op0Opcode = Op0.getOpcode();
1689 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1690 if (ConstantSDNode *ShiftC =
1691 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1692 // Don't crash on an oversized shift. We can not guarantee that a
1693 // bogus shift has been simplified to undef.
1694 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1695 uint64_t ShiftAmt = ShiftC->getZExtValue();
1697 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1698 : Ones.lshr(ShiftAmt);
1699 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1701 // If the xor constant is a demanded mask, do a 'not' before the
1702 // shift:
1703 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1704 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1705 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1706 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1707 Op0.getOperand(1)));
1708 }
1709 }
1710 }
1711 }
1712 }
1713
1714 // If we can't turn this into a 'not', try to shrink the constant.
1715 if (!C || !C->isAllOnes())
1716 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1717 return true;
1718
1719 // Attempt to avoid multi-use ops if we don't need anything from them.
1720 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1722 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1724 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1725 if (DemandedOp0 || DemandedOp1) {
1726 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1727 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1728 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1729 return TLO.CombineTo(Op, NewOp);
1730 }
1731 }
1732
1733 Known ^= Known2;
1734 break;
1735 }
1736 case ISD::SELECT:
1737 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1738 Known, TLO, Depth + 1))
1739 return true;
1740 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1741 Known2, TLO, Depth + 1))
1742 return true;
1743
1744 // If the operands are constants, see if we can simplify them.
1745 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1746 return true;
1747
1748 // Only known if known in both the LHS and RHS.
1749 Known = Known.intersectWith(Known2);
1750 break;
1751 case ISD::VSELECT:
1752 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1753 Known, TLO, Depth + 1))
1754 return true;
1755 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1756 Known2, TLO, Depth + 1))
1757 return true;
1758
1759 // Only known if known in both the LHS and RHS.
1760 Known = Known.intersectWith(Known2);
1761 break;
1762 case ISD::SELECT_CC:
1763 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1764 Known, TLO, Depth + 1))
1765 return true;
1766 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1767 Known2, TLO, Depth + 1))
1768 return true;
1769
1770 // If the operands are constants, see if we can simplify them.
1771 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1772 return true;
1773
1774 // Only known if known in both the LHS and RHS.
1775 Known = Known.intersectWith(Known2);
1776 break;
1777 case ISD::SETCC: {
1778 SDValue Op0 = Op.getOperand(0);
1779 SDValue Op1 = Op.getOperand(1);
1780 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1781 // If we're testing X < 0, X >= 0, X <= -1 or X > -1
1782 // (X is of integer type) then we only need the sign mask of the previous
1783 // result
1784 if (Op1.getValueType().isInteger() &&
1785 (((CC == ISD::SETLT || CC == ISD::SETGE) && isNullOrNullSplat(Op1)) ||
1786 ((CC == ISD::SETLE || CC == ISD::SETGT) &&
1787 isAllOnesOrAllOnesSplat(Op1)))) {
1788 KnownBits KnownOp0;
1791 DemandedElts, KnownOp0, TLO, Depth + 1))
1792 return true;
1793 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1794 // width as the setcc result, and (3) the result of a setcc conforms to 0
1795 // or -1, we may be able to bypass the setcc.
1796 if (DemandedBits.isSignMask() &&
1800 // If we remove a >= 0 or > -1 (for integers), we need to introduce a
1801 // NOT Operation
1802 if (CC == ISD::SETGE || CC == ISD::SETGT) {
1803 SDLoc DL(Op);
1804 EVT VT = Op0.getValueType();
1805 SDValue NotOp0 = TLO.DAG.getNOT(DL, Op0, VT);
1806 return TLO.CombineTo(Op, NotOp0);
1807 }
1808 return TLO.CombineTo(Op, Op0);
1809 }
1810 }
1811 if (getBooleanContents(Op0.getValueType()) ==
1813 BitWidth > 1)
1814 Known.Zero.setBitsFrom(1);
1815 break;
1816 }
1817 case ISD::SHL: {
1818 SDValue Op0 = Op.getOperand(0);
1819 SDValue Op1 = Op.getOperand(1);
1820 EVT ShiftVT = Op1.getValueType();
1821
1822 if (std::optional<unsigned> KnownSA =
1823 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1824 unsigned ShAmt = *KnownSA;
1825 if (ShAmt == 0)
1826 return TLO.CombineTo(Op, Op0);
1827
1828 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1829 // single shift. We can do this if the bottom bits (which are shifted
1830 // out) are never demanded.
1831 // TODO - support non-uniform vector amounts.
1832 if (Op0.getOpcode() == ISD::SRL) {
1833 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1834 if (std::optional<unsigned> InnerSA =
1835 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1836 unsigned C1 = *InnerSA;
1837 unsigned Opc = ISD::SHL;
1838 int Diff = ShAmt - C1;
1839 if (Diff < 0) {
1840 Diff = -Diff;
1841 Opc = ISD::SRL;
1842 }
1843 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1844 return TLO.CombineTo(
1845 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1846 }
1847 }
1848 }
1849
1850 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1851 // are not demanded. This will likely allow the anyext to be folded away.
1852 // TODO - support non-uniform vector amounts.
1853 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1854 SDValue InnerOp = Op0.getOperand(0);
1855 EVT InnerVT = InnerOp.getValueType();
1856 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1857 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1858 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1859 SDValue NarrowShl = TLO.DAG.getNode(
1860 ISD::SHL, dl, InnerVT, InnerOp,
1861 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1862 return TLO.CombineTo(
1863 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1864 }
1865
1866 // Repeat the SHL optimization above in cases where an extension
1867 // intervenes: (shl (anyext (shr x, c1)), c2) to
1868 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1869 // aren't demanded (as above) and that the shifted upper c1 bits of
1870 // x aren't demanded.
1871 // TODO - support non-uniform vector amounts.
1872 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1873 InnerOp.hasOneUse()) {
1874 if (std::optional<unsigned> SA2 = TLO.DAG.getValidShiftAmount(
1875 InnerOp, DemandedElts, Depth + 2)) {
1876 unsigned InnerShAmt = *SA2;
1877 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1878 DemandedBits.getActiveBits() <=
1879 (InnerBits - InnerShAmt + ShAmt) &&
1880 DemandedBits.countr_zero() >= ShAmt) {
1881 SDValue NewSA =
1882 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1883 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1884 InnerOp.getOperand(0));
1885 return TLO.CombineTo(
1886 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1887 }
1888 }
1889 }
1890 }
1891
1892 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1893 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1894 Depth + 1)) {
1895 // Disable the nsw and nuw flags. We can no longer guarantee that we
1896 // won't wrap after simplification.
1897 Op->dropFlags(SDNodeFlags::NoWrap);
1898 return true;
1899 }
1900 Known <<= ShAmt;
1901 // low bits known zero.
1902 Known.Zero.setLowBits(ShAmt);
1903
1904 // Attempt to avoid multi-use ops if we don't need anything from them.
1905 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1907 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1908 if (DemandedOp0) {
1909 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1910 return TLO.CombineTo(Op, NewOp);
1911 }
1912 }
1913
1914 // TODO: Can we merge this fold with the one below?
1915 // Try shrinking the operation as long as the shift amount will still be
1916 // in range.
1917 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1918 Op.getNode()->hasOneUse()) {
1919 // Search for the smallest integer type with free casts to and from
1920 // Op's type. For expedience, just check power-of-2 integer types.
1921 unsigned DemandedSize = DemandedBits.getActiveBits();
1922 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1923 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1924 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1925 if (isNarrowingProfitable(Op.getNode(), VT, SmallVT) &&
1926 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1927 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1928 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1929 assert(DemandedSize <= SmallVTBits &&
1930 "Narrowed below demanded bits?");
1931 // We found a type with free casts.
1932 SDValue NarrowShl = TLO.DAG.getNode(
1933 ISD::SHL, dl, SmallVT,
1934 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1935 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1936 return TLO.CombineTo(
1937 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1938 }
1939 }
1940 }
1941
1942 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1943 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1944 // Only do this if we demand the upper half so the knownbits are correct.
1945 unsigned HalfWidth = BitWidth / 2;
1946 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1947 DemandedBits.countLeadingOnes() >= HalfWidth) {
1948 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1949 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
1950 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1951 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1952 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1953 // If we're demanding the upper bits at all, we must ensure
1954 // that the upper bits of the shift result are known to be zero,
1955 // which is equivalent to the narrow shift being NUW.
1956 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1957 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1958 SDNodeFlags Flags;
1959 Flags.setNoSignedWrap(IsNSW);
1960 Flags.setNoUnsignedWrap(IsNUW);
1961 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1962 SDValue NewShiftAmt =
1963 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1964 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1965 NewShiftAmt, Flags);
1966 SDValue NewExt =
1967 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1968 return TLO.CombineTo(Op, NewExt);
1969 }
1970 }
1971 }
1972 } else {
1973 // This is a variable shift, so we can't shift the demand mask by a known
1974 // amount. But if we are not demanding high bits, then we are not
1975 // demanding those bits from the pre-shifted operand either.
1976 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1977 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1978 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1979 Depth + 1)) {
1980 // Disable the nsw and nuw flags. We can no longer guarantee that we
1981 // won't wrap after simplification.
1982 Op->dropFlags(SDNodeFlags::NoWrap);
1983 return true;
1984 }
1985 Known.resetAll();
1986 }
1987 }
1988
1989 // If we are only demanding sign bits then we can use the shift source
1990 // directly.
1991 if (std::optional<unsigned> MaxSA =
1992 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1993 unsigned ShAmt = *MaxSA;
1994 unsigned NumSignBits =
1995 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1996 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1997 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1998 return TLO.CombineTo(Op, Op0);
1999 }
2000 break;
2001 }
2002 case ISD::SRL: {
2003 SDValue Op0 = Op.getOperand(0);
2004 SDValue Op1 = Op.getOperand(1);
2005 EVT ShiftVT = Op1.getValueType();
2006
2007 if (std::optional<unsigned> KnownSA =
2008 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2009 unsigned ShAmt = *KnownSA;
2010 if (ShAmt == 0)
2011 return TLO.CombineTo(Op, Op0);
2012
2013 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
2014 // single shift. We can do this if the top bits (which are shifted out)
2015 // are never demanded.
2016 // TODO - support non-uniform vector amounts.
2017 if (Op0.getOpcode() == ISD::SHL) {
2018 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2019 if (std::optional<unsigned> InnerSA =
2020 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2021 unsigned C1 = *InnerSA;
2022 unsigned Opc = ISD::SRL;
2023 int Diff = ShAmt - C1;
2024 if (Diff < 0) {
2025 Diff = -Diff;
2026 Opc = ISD::SHL;
2027 }
2028 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
2029 return TLO.CombineTo(
2030 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
2031 }
2032 }
2033 }
2034
2035 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
2036 // single sra. We can do this if the top bits are never demanded.
2037 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
2038 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
2039 if (std::optional<unsigned> InnerSA =
2040 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2041 unsigned C1 = *InnerSA;
2042 // Clamp the combined shift amount if it exceeds the bit width.
2043 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
2044 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
2045 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
2046 Op0.getOperand(0), NewSA));
2047 }
2048 }
2049 }
2050
2051 APInt InDemandedMask = (DemandedBits << ShAmt);
2052
2053 // If the shift is exact, then it does demand the low bits (and knows that
2054 // they are zero).
2055 if (Op->getFlags().hasExact())
2056 InDemandedMask.setLowBits(ShAmt);
2057
2058 // Narrow shift to lower half - similar to ShrinkDemandedOp.
2059 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
2060 if ((BitWidth % 2) == 0 && !VT.isVector()) {
2062 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
2063 if (isNarrowingProfitable(Op.getNode(), VT, HalfVT) &&
2064 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
2065 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
2066 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
2067 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
2068 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
2069 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
2070 SDValue NewShiftAmt =
2071 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
2072 SDValue NewShift =
2073 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
2074 return TLO.CombineTo(
2075 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2076 }
2077 }
2078
2079 // Compute the new bits that are at the top now.
2080 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2081 Depth + 1))
2082 return true;
2083 Known >>= ShAmt;
2084 // High bits known zero.
2085 Known.Zero.setHighBits(ShAmt);
2086
2087 // Attempt to avoid multi-use ops if we don't need anything from them.
2088 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2090 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2091 if (DemandedOp0) {
2092 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2093 return TLO.CombineTo(Op, NewOp);
2094 }
2095 }
2096 } else {
2097 // Use generic knownbits computation as it has support for non-uniform
2098 // shift amounts.
2099 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2100 }
2101
2102 // If we are only demanding sign bits then we can use the shift source
2103 // directly.
2104 if (std::optional<unsigned> MaxSA =
2105 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
2106 unsigned ShAmt = *MaxSA;
2107 // Must already be signbits in DemandedBits bounds, and can't demand any
2108 // shifted in zeroes.
2109 if (DemandedBits.countl_zero() >= ShAmt) {
2110 unsigned NumSignBits =
2111 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
2112 if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
2113 return TLO.CombineTo(Op, Op0);
2114 }
2115 }
2116
2117 // Try to match AVG patterns (after shift simplification).
2118 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2119 DemandedElts, Depth + 1))
2120 return TLO.CombineTo(Op, AVG);
2121
2122 break;
2123 }
2124 case ISD::SRA: {
2125 SDValue Op0 = Op.getOperand(0);
2126 SDValue Op1 = Op.getOperand(1);
2127 EVT ShiftVT = Op1.getValueType();
2128
2129 // If we only want bits that already match the signbit then we don't need
2130 // to shift.
2131 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2132 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2133 NumHiDemandedBits)
2134 return TLO.CombineTo(Op, Op0);
2135
2136 // If this is an arithmetic shift right and only the low-bit is set, we can
2137 // always convert this into a logical shr, even if the shift amount is
2138 // variable. The low bit of the shift cannot be an input sign bit unless
2139 // the shift amount is >= the size of the datatype, which is undefined.
2140 if (DemandedBits.isOne())
2141 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2142
2143 if (std::optional<unsigned> KnownSA =
2144 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2145 unsigned ShAmt = *KnownSA;
2146 if (ShAmt == 0)
2147 return TLO.CombineTo(Op, Op0);
2148
2149 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2150 // supports sext_inreg.
2151 if (Op0.getOpcode() == ISD::SHL) {
2152 if (std::optional<unsigned> InnerSA =
2153 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2154 unsigned LowBits = BitWidth - ShAmt;
2155 EVT ExtVT = VT.changeElementType(
2156 *TLO.DAG.getContext(),
2157 EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits));
2158
2159 if (*InnerSA == ShAmt) {
2160 if (!TLO.LegalOperations() ||
2162 return TLO.CombineTo(
2163 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2164 Op0.getOperand(0),
2165 TLO.DAG.getValueType(ExtVT)));
2166
2167 // Even if we can't convert to sext_inreg, we might be able to
2168 // remove this shift pair if the input is already sign extended.
2169 unsigned NumSignBits =
2170 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2171 if (NumSignBits > ShAmt)
2172 return TLO.CombineTo(Op, Op0.getOperand(0));
2173 }
2174 }
2175 }
2176
2177 APInt InDemandedMask = (DemandedBits << ShAmt);
2178
2179 // If the shift is exact, then it does demand the low bits (and knows that
2180 // they are zero).
2181 if (Op->getFlags().hasExact())
2182 InDemandedMask.setLowBits(ShAmt);
2183
2184 // If any of the demanded bits are produced by the sign extension, we also
2185 // demand the input sign bit.
2186 if (DemandedBits.countl_zero() < ShAmt)
2187 InDemandedMask.setSignBit();
2188
2189 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2190 Depth + 1))
2191 return true;
2192 Known >>= ShAmt;
2193
2194 // If the input sign bit is known to be zero, or if none of the top bits
2195 // are demanded, turn this into an unsigned shift right.
2196 if (Known.Zero[BitWidth - ShAmt - 1] ||
2197 DemandedBits.countl_zero() >= ShAmt) {
2198 SDNodeFlags Flags;
2199 Flags.setExact(Op->getFlags().hasExact());
2200 return TLO.CombineTo(
2201 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2202 }
2203
2204 int Log2 = DemandedBits.exactLogBase2();
2205 if (Log2 >= 0) {
2206 // The bit must come from the sign.
2207 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2208 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2209 }
2210
2211 if (Known.One[BitWidth - ShAmt - 1])
2212 // New bits are known one.
2213 Known.One.setHighBits(ShAmt);
2214
2215 // Attempt to avoid multi-use ops if we don't need anything from them.
2216 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2218 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2219 if (DemandedOp0) {
2220 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2221 return TLO.CombineTo(Op, NewOp);
2222 }
2223 }
2224 }
2225
2226 // Try to match AVG patterns (after shift simplification).
2227 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2228 DemandedElts, Depth + 1))
2229 return TLO.CombineTo(Op, AVG);
2230
2231 break;
2232 }
2233 case ISD::FSHL:
2234 case ISD::FSHR: {
2235 SDValue Op0 = Op.getOperand(0);
2236 SDValue Op1 = Op.getOperand(1);
2237 SDValue Op2 = Op.getOperand(2);
2238 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2239
2240 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2241 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2242
2243 // For fshl, 0-shift returns the 1st arg.
2244 // For fshr, 0-shift returns the 2nd arg.
2245 if (Amt == 0) {
2246 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2247 Known, TLO, Depth + 1))
2248 return true;
2249 break;
2250 }
2251
2252 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2253 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2254 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2255 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2256 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2257 Depth + 1))
2258 return true;
2259 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2260 Depth + 1))
2261 return true;
2262
2263 Known2 <<= (IsFSHL ? Amt : (BitWidth - Amt));
2264 Known >>= (IsFSHL ? (BitWidth - Amt) : Amt);
2265 Known = Known.unionWith(Known2);
2266
2267 // Attempt to avoid multi-use ops if we don't need anything from them.
2268 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2269 !DemandedElts.isAllOnes()) {
2271 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2273 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2274 if (DemandedOp0 || DemandedOp1) {
2275 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2276 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2277 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2278 DemandedOp1, Op2);
2279 return TLO.CombineTo(Op, NewOp);
2280 }
2281 }
2282 }
2283
2284 if (isPowerOf2_32(BitWidth)) {
2285 // Fold FSHR(Op0,Op1,Op2) -> SRL(Op1,Op2)
2286 // iff we're guaranteed not to use Op0.
2287 // TODO: Add FSHL equivalent?
2288 if (!IsFSHL && !DemandedBits.isAllOnes() &&
2289 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT))) {
2290 KnownBits KnownAmt =
2291 TLO.DAG.computeKnownBits(Op2, DemandedElts, Depth + 1);
2292 unsigned MaxShiftAmt =
2293 KnownAmt.getMaxValue().getLimitedValue(BitWidth - 1);
2294 // Check we don't demand any shifted bits outside Op1.
2295 if (DemandedBits.countl_zero() >= MaxShiftAmt) {
2296 EVT AmtVT = Op2.getValueType();
2297 SDValue NewAmt =
2298 TLO.DAG.getNode(ISD::AND, dl, AmtVT, Op2,
2299 TLO.DAG.getConstant(BitWidth - 1, dl, AmtVT));
2300 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, Op1, NewAmt);
2301 return TLO.CombineTo(Op, NewOp);
2302 }
2303 }
2304
2305 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2306 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2307 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts, Known2, TLO,
2308 Depth + 1))
2309 return true;
2310 }
2311 break;
2312 }
2313 case ISD::ROTL:
2314 case ISD::ROTR: {
2315 SDValue Op0 = Op.getOperand(0);
2316 SDValue Op1 = Op.getOperand(1);
2317 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2318
2319 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2320 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2321 return TLO.CombineTo(Op, Op0);
2322
2323 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2324 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2325 unsigned RevAmt = BitWidth - Amt;
2326
2327 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2328 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2329 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2330 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2331 Depth + 1))
2332 return true;
2333
2334 // rot*(x, 0) --> x
2335 if (Amt == 0)
2336 return TLO.CombineTo(Op, Op0);
2337
2338 // See if we don't demand either half of the rotated bits.
2339 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2340 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2341 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2342 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2343 }
2344 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2345 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2346 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2347 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2348 }
2349 }
2350
2351 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2352 if (isPowerOf2_32(BitWidth)) {
2353 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2354 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2355 Depth + 1))
2356 return true;
2357 }
2358 break;
2359 }
2360 case ISD::SMIN:
2361 case ISD::SMAX:
2362 case ISD::UMIN:
2363 case ISD::UMAX: {
2364 unsigned Opc = Op.getOpcode();
2365 SDValue Op0 = Op.getOperand(0);
2366 SDValue Op1 = Op.getOperand(1);
2367
2368 // If we're only demanding signbits, then we can simplify to OR/AND node.
2369 unsigned BitOp =
2370 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2371 unsigned NumSignBits =
2372 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2373 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2374 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2375 if (NumSignBits >= NumDemandedUpperBits)
2376 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2377
2378 // Check if one arg is always less/greater than (or equal) to the other arg.
2379 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2380 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2381 switch (Opc) {
2382 case ISD::SMIN:
2383 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2384 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2385 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2386 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2387 Known = KnownBits::smin(Known0, Known1);
2388 break;
2389 case ISD::SMAX:
2390 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2391 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2392 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2393 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2394 Known = KnownBits::smax(Known0, Known1);
2395 break;
2396 case ISD::UMIN:
2397 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2398 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2399 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2400 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2401 Known = KnownBits::umin(Known0, Known1);
2402 break;
2403 case ISD::UMAX:
2404 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2405 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2406 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2407 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2408 Known = KnownBits::umax(Known0, Known1);
2409 break;
2410 }
2411 break;
2412 }
2413 case ISD::BITREVERSE: {
2414 SDValue Src = Op.getOperand(0);
2415 APInt DemandedSrcBits = DemandedBits.reverseBits();
2416 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2417 Depth + 1))
2418 return true;
2419 Known = Known2.reverseBits();
2420 break;
2421 }
2422 case ISD::BSWAP: {
2423 SDValue Src = Op.getOperand(0);
2424
2425 // If the only bits demanded come from one byte of the bswap result,
2426 // just shift the input byte into position to eliminate the bswap.
2427 unsigned NLZ = DemandedBits.countl_zero();
2428 unsigned NTZ = DemandedBits.countr_zero();
2429
2430 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2431 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2432 // have 14 leading zeros, round to 8.
2433 NLZ = alignDown(NLZ, 8);
2434 NTZ = alignDown(NTZ, 8);
2435 // If we need exactly one byte, we can do this transformation.
2436 if (BitWidth - NLZ - NTZ == 8) {
2437 // Replace this with either a left or right shift to get the byte into
2438 // the right place.
2439 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2440 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2441 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2442 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2443 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2444 return TLO.CombineTo(Op, NewOp);
2445 }
2446 }
2447
2448 APInt DemandedSrcBits = DemandedBits.byteSwap();
2449 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2450 Depth + 1))
2451 return true;
2452 Known = Known2.byteSwap();
2453 break;
2454 }
2455 case ISD::CTPOP: {
2456 // If only 1 bit is demanded, replace with PARITY as long as we're before
2457 // op legalization.
2458 // FIXME: Limit to scalars for now.
2459 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2460 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2461 Op.getOperand(0)));
2462
2463 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2464 break;
2465 }
2467 SDValue Op0 = Op.getOperand(0);
2468 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2469 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2470
2471 // If we only care about the highest bit, don't bother shifting right.
2472 if (DemandedBits.isSignMask()) {
2473 unsigned MinSignedBits =
2474 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2475 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2476 // However if the input is already sign extended we expect the sign
2477 // extension to be dropped altogether later and do not simplify.
2478 if (!AlreadySignExtended) {
2479 // Compute the correct shift amount type, which must be getShiftAmountTy
2480 // for scalar types after legalization.
2481 SDValue ShiftAmt =
2482 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2483 return TLO.CombineTo(Op,
2484 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2485 }
2486 }
2487
2488 // If none of the extended bits are demanded, eliminate the sextinreg.
2489 if (DemandedBits.getActiveBits() <= ExVTBits)
2490 return TLO.CombineTo(Op, Op0);
2491
2492 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2493
2494 // Since the sign extended bits are demanded, we know that the sign
2495 // bit is demanded.
2496 InputDemandedBits.setBit(ExVTBits - 1);
2497
2498 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2499 Depth + 1))
2500 return true;
2501
2502 // If the sign bit of the input is known set or clear, then we know the
2503 // top bits of the result.
2504
2505 // If the input sign bit is known zero, convert this into a zero extension.
2506 if (Known.Zero[ExVTBits - 1])
2507 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2508
2509 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2510 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2511 Known.One.setBitsFrom(ExVTBits);
2512 Known.Zero &= Mask;
2513 } else { // Input sign bit unknown
2514 Known.Zero &= Mask;
2515 Known.One &= Mask;
2516 }
2517 break;
2518 }
2519 case ISD::BUILD_PAIR: {
2520 EVT HalfVT = Op.getOperand(0).getValueType();
2521 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2522
2523 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2524 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2525
2526 KnownBits KnownLo, KnownHi;
2527
2528 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2529 return true;
2530
2531 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2532 return true;
2533
2534 Known = KnownHi.concat(KnownLo);
2535 break;
2536 }
2538 if (VT.isScalableVector())
2539 return false;
2540 [[fallthrough]];
2541 case ISD::ZERO_EXTEND: {
2542 SDValue Src = Op.getOperand(0);
2543 EVT SrcVT = Src.getValueType();
2544 unsigned InBits = SrcVT.getScalarSizeInBits();
2545 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2546 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2547
2548 // If none of the top bits are demanded, convert this into an any_extend.
2549 if (DemandedBits.getActiveBits() <= InBits) {
2550 // If we only need the non-extended bits of the bottom element
2551 // then we can just bitcast to the result.
2552 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2553 VT.getSizeInBits() == SrcVT.getSizeInBits())
2554 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2555
2556 unsigned Opc =
2558 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2559 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2560 }
2561
2562 APInt InDemandedBits = DemandedBits.trunc(InBits);
2563 APInt InDemandedElts = DemandedElts.zext(InElts);
2564 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2565 Depth + 1)) {
2566 Op->dropFlags(SDNodeFlags::NonNeg);
2567 return true;
2568 }
2569 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2570 Known = Known.zext(BitWidth);
2571
2572 // Attempt to avoid multi-use ops if we don't need anything from them.
2574 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2575 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2576 break;
2577 }
2579 if (VT.isScalableVector())
2580 return false;
2581 [[fallthrough]];
2582 case ISD::SIGN_EXTEND: {
2583 SDValue Src = Op.getOperand(0);
2584 EVT SrcVT = Src.getValueType();
2585 unsigned InBits = SrcVT.getScalarSizeInBits();
2586 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2587 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2588
2589 APInt InDemandedElts = DemandedElts.zext(InElts);
2590 APInt InDemandedBits = DemandedBits.trunc(InBits);
2591
2592 // Since some of the sign extended bits are demanded, we know that the sign
2593 // bit is demanded.
2594 InDemandedBits.setBit(InBits - 1);
2595
2596 // If none of the top bits are demanded, convert this into an any_extend.
2597 if (DemandedBits.getActiveBits() <= InBits) {
2598 // If we only need the non-extended bits of the bottom element
2599 // then we can just bitcast to the result.
2600 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2601 VT.getSizeInBits() == SrcVT.getSizeInBits())
2602 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2603
2604 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2606 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2607 InBits) {
2608 unsigned Opc =
2610 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2611 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2612 }
2613 }
2614
2615 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2616 Depth + 1))
2617 return true;
2618 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2619
2620 // If the sign bit is known one, the top bits match.
2621 Known = Known.sext(BitWidth);
2622
2623 // If the sign bit is known zero, convert this to a zero extend.
2624 if (Known.isNonNegative()) {
2625 unsigned Opc =
2627 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2628 SDNodeFlags Flags;
2629 if (!IsVecInReg)
2630 Flags |= SDNodeFlags::NonNeg;
2631 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2632 }
2633 }
2634
2635 // Attempt to avoid multi-use ops if we don't need anything from them.
2637 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2638 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2639 break;
2640 }
2642 if (VT.isScalableVector())
2643 return false;
2644 [[fallthrough]];
2645 case ISD::ANY_EXTEND: {
2646 SDValue Src = Op.getOperand(0);
2647 EVT SrcVT = Src.getValueType();
2648 unsigned InBits = SrcVT.getScalarSizeInBits();
2649 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2650 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2651
2652 // If we only need the bottom element then we can just bitcast.
2653 // TODO: Handle ANY_EXTEND?
2654 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2655 VT.getSizeInBits() == SrcVT.getSizeInBits())
2656 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2657
2658 APInt InDemandedBits = DemandedBits.trunc(InBits);
2659 APInt InDemandedElts = DemandedElts.zext(InElts);
2660 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2661 Depth + 1))
2662 return true;
2663 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2664 Known = Known.anyext(BitWidth);
2665
2666 // Attempt to avoid multi-use ops if we don't need anything from them.
2668 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2669 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2670 break;
2671 }
2672 case ISD::TRUNCATE: {
2673 SDValue Src = Op.getOperand(0);
2674
2675 // Simplify the input, using demanded bit information, and compute the known
2676 // zero/one bits live out.
2677 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2678 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2679 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2680 Depth + 1)) {
2681 // Disable the nsw and nuw flags. We can no longer guarantee that we
2682 // won't wrap after simplification.
2683 Op->dropFlags(SDNodeFlags::NoWrap);
2684 return true;
2685 }
2686 Known = Known.trunc(BitWidth);
2687
2688 // Attempt to avoid multi-use ops if we don't need anything from them.
2690 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2691 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2692
2693 // If the input is only used by this truncate, see if we can shrink it based
2694 // on the known demanded bits.
2695 switch (Src.getOpcode()) {
2696 default:
2697 break;
2698 case ISD::SRL:
2699 // Shrink SRL by a constant if none of the high bits shifted in are
2700 // demanded.
2701 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2702 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2703 // undesirable.
2704 break;
2705
2706 if (Src.getNode()->hasOneUse()) {
2707 if (isTruncateFree(Src, VT) &&
2708 !isTruncateFree(Src.getValueType(), VT)) {
2709 // If truncate is only free at trunc(srl), do not turn it into
2710 // srl(trunc). The check is done by first check the truncate is free
2711 // at Src's opcode(srl), then check the truncate is not done by
2712 // referencing sub-register. In test, if both trunc(srl) and
2713 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2714 // trunc(srl)'s trunc is free, trunc(srl) is better.
2715 break;
2716 }
2717
2718 std::optional<unsigned> ShAmtC =
2719 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2720 if (!ShAmtC || *ShAmtC >= BitWidth)
2721 break;
2722 unsigned ShVal = *ShAmtC;
2723
2724 APInt HighBits =
2725 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2726 HighBits.lshrInPlace(ShVal);
2727 HighBits = HighBits.trunc(BitWidth);
2728 if (!(HighBits & DemandedBits)) {
2729 // None of the shifted in bits are needed. Add a truncate of the
2730 // shift input, then shift it.
2731 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2732 SDValue NewTrunc =
2733 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2734 return TLO.CombineTo(
2735 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2736 }
2737 }
2738 break;
2739 }
2740
2741 break;
2742 }
2743 case ISD::AssertZext: {
2744 // AssertZext demands all of the high bits, plus any of the low bits
2745 // demanded by its users.
2746 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2748 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2749 TLO, Depth + 1))
2750 return true;
2751
2752 Known.Zero |= ~InMask;
2753 Known.One &= (~Known.Zero);
2754 break;
2755 }
2757 SDValue Src = Op.getOperand(0);
2758 SDValue Idx = Op.getOperand(1);
2759 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2760 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2761
2762 if (SrcEltCnt.isScalable())
2763 return false;
2764
2765 // Demand the bits from every vector element without a constant index.
2766 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2767 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2768 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2769 if (CIdx->getAPIntValue().ult(NumSrcElts))
2770 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2771
2772 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2773 // anything about the extended bits.
2774 APInt DemandedSrcBits = DemandedBits;
2775 if (BitWidth > EltBitWidth)
2776 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2777
2778 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2779 Depth + 1))
2780 return true;
2781
2782 // Attempt to avoid multi-use ops if we don't need anything from them.
2783 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2784 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2785 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2786 SDValue NewOp =
2787 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2788 return TLO.CombineTo(Op, NewOp);
2789 }
2790 }
2791
2792 Known = Known2;
2793 if (BitWidth > EltBitWidth)
2794 Known = Known.anyext(BitWidth);
2795 break;
2796 }
2797 case ISD::BITCAST: {
2798 if (VT.isScalableVector())
2799 return false;
2800 SDValue Src = Op.getOperand(0);
2801 EVT SrcVT = Src.getValueType();
2802 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2803
2804 // If this is an FP->Int bitcast and if the sign bit is the only
2805 // thing demanded, turn this into a FGETSIGN.
2806 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2807 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2808 SrcVT.isFloatingPoint()) {
2810 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2811 // place. We expect the SHL to be eliminated by other optimizations.
2812 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, VT, Src);
2813 unsigned ShVal = Op.getValueSizeInBits() - 1;
2814 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2815 return TLO.CombineTo(Op,
2816 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2817 }
2818 }
2819
2820 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2821 // Demand the elt/bit if any of the original elts/bits are demanded.
2822 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2823 unsigned Scale = BitWidth / NumSrcEltBits;
2824 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2825 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2826 for (unsigned i = 0; i != Scale; ++i) {
2827 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2828 unsigned BitOffset = EltOffset * NumSrcEltBits;
2829 DemandedSrcBits |= DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2830 }
2831 // Recursive calls below may turn not demanded elements into poison, so we
2832 // need to demand all smaller source elements that maps to a demanded
2833 // destination element.
2834 APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2835
2836 APInt KnownSrcUndef, KnownSrcZero;
2837 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2838 KnownSrcZero, TLO, Depth + 1))
2839 return true;
2840
2841 KnownBits KnownSrcBits;
2842 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2843 KnownSrcBits, TLO, Depth + 1))
2844 return true;
2845 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2846 // TODO - bigendian once we have test coverage.
2847 unsigned Scale = NumSrcEltBits / BitWidth;
2848 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2849 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2850 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2851 for (unsigned i = 0; i != NumElts; ++i)
2852 if (DemandedElts[i]) {
2853 unsigned Offset = (i % Scale) * BitWidth;
2854 DemandedSrcBits.insertBits(DemandedBits, Offset);
2855 DemandedSrcElts.setBit(i / Scale);
2856 }
2857
2858 if (SrcVT.isVector()) {
2859 APInt KnownSrcUndef, KnownSrcZero;
2860 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2861 KnownSrcZero, TLO, Depth + 1))
2862 return true;
2863 }
2864
2865 KnownBits KnownSrcBits;
2866 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2867 KnownSrcBits, TLO, Depth + 1))
2868 return true;
2869
2870 // Attempt to avoid multi-use ops if we don't need anything from them.
2871 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2872 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2873 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2874 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2875 return TLO.CombineTo(Op, NewOp);
2876 }
2877 }
2878 }
2879
2880 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2881 // recursive call where Known may be useful to the caller.
2882 if (Depth > 0) {
2883 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2884 return false;
2885 }
2886 break;
2887 }
2888 case ISD::MUL:
2889 if (DemandedBits.isPowerOf2()) {
2890 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2891 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2892 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2893 unsigned CTZ = DemandedBits.countr_zero();
2894 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2895 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2896 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2897 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2898 return TLO.CombineTo(Op, Shl);
2899 }
2900 }
2901 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2902 // X * X is odd iff X is odd.
2903 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2904 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2905 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2906 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2907 return TLO.CombineTo(Op, And1);
2908 }
2909 [[fallthrough]];
2910 case ISD::PTRADD:
2911 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
2912 break;
2913 // PTRADD behaves like ADD if pointers are represented as integers.
2914 [[fallthrough]];
2915 case ISD::ADD:
2916 case ISD::SUB: {
2917 // Add, Sub, and Mul don't demand any bits in positions beyond that
2918 // of the highest bit demanded of them.
2919 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2920 SDNodeFlags Flags = Op.getNode()->getFlags();
2921 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2922 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2923 KnownBits KnownOp0, KnownOp1;
2924 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2925 const KnownBits &KnownRHS) {
2926 if (Op.getOpcode() == ISD::MUL)
2927 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2928 return Demanded;
2929 };
2930 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2931 Depth + 1) ||
2932 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2933 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2934 // See if the operation should be performed at a smaller bit width.
2936 // Disable the nsw and nuw flags. We can no longer guarantee that we
2937 // won't wrap after simplification.
2938 Op->dropFlags(SDNodeFlags::NoWrap);
2939 return true;
2940 }
2941
2942 // neg x with only low bit demanded is simply x.
2943 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2944 isNullConstant(Op0))
2945 return TLO.CombineTo(Op, Op1);
2946
2947 // Attempt to avoid multi-use ops if we don't need anything from them.
2948 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2950 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2952 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2953 if (DemandedOp0 || DemandedOp1) {
2954 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2955 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2956 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
2957 Flags & ~SDNodeFlags::NoWrap);
2958 return TLO.CombineTo(Op, NewOp);
2959 }
2960 }
2961
2962 // If we have a constant operand, we may be able to turn it into -1 if we
2963 // do not demand the high bits. This can make the constant smaller to
2964 // encode, allow more general folding, or match specialized instruction
2965 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2966 // is probably not useful (and could be detrimental).
2968 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2969 if (C && !C->isAllOnes() && !C->isOne() &&
2970 (C->getAPIntValue() | HighMask).isAllOnes()) {
2971 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2972 // Disable the nsw and nuw flags. We can no longer guarantee that we
2973 // won't wrap after simplification.
2974 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1,
2975 Flags & ~SDNodeFlags::NoWrap);
2976 return TLO.CombineTo(Op, NewOp);
2977 }
2978
2979 // Match a multiply with a disguised negated-power-of-2 and convert to a
2980 // an equivalent shift-left amount.
2981 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2982 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2983 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2984 return 0;
2985
2986 // Don't touch opaque constants. Also, ignore zero and power-of-2
2987 // multiplies. Those will get folded later.
2988 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2989 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2990 !MulC->getAPIntValue().isPowerOf2()) {
2991 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2992 if (UnmaskedC.isNegatedPowerOf2())
2993 return (-UnmaskedC).logBase2();
2994 }
2995 return 0;
2996 };
2997
2998 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2999 unsigned ShlAmt) {
3000 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
3001 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
3002 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
3003 return TLO.CombineTo(Op, Res);
3004 };
3005
3007 if (Op.getOpcode() == ISD::ADD) {
3008 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
3009 if (unsigned ShAmt = getShiftLeftAmt(Op0))
3010 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
3011 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
3012 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3013 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
3014 }
3015 if (Op.getOpcode() == ISD::SUB) {
3016 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
3017 if (unsigned ShAmt = getShiftLeftAmt(Op1))
3018 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
3019 }
3020 }
3021
3022 if (Op.getOpcode() == ISD::MUL) {
3023 Known = KnownBits::mul(KnownOp0, KnownOp1);
3024 } else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB.
3026 Op.getOpcode() != ISD::SUB, Flags.hasNoSignedWrap(),
3027 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
3028 }
3029 break;
3030 }
3031 case ISD::FABS: {
3032 SDValue Op0 = Op.getOperand(0);
3033 APInt SignMask = APInt::getSignMask(BitWidth);
3034
3035 if (!DemandedBits.intersects(SignMask))
3036 return TLO.CombineTo(Op, Op0);
3037
3038 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3039 Depth + 1))
3040 return true;
3041
3042 if (Known.isNonNegative())
3043 return TLO.CombineTo(Op, Op0);
3044 if (Known.isNegative())
3045 return TLO.CombineTo(
3046 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
3047
3048 Known.Zero |= SignMask;
3049 Known.One &= ~SignMask;
3050
3051 break;
3052 }
3053 case ISD::FCOPYSIGN: {
3054 SDValue Op0 = Op.getOperand(0);
3055 SDValue Op1 = Op.getOperand(1);
3056
3057 unsigned BitWidth0 = Op0.getScalarValueSizeInBits();
3058 unsigned BitWidth1 = Op1.getScalarValueSizeInBits();
3059 APInt SignMask0 = APInt::getSignMask(BitWidth0);
3060 APInt SignMask1 = APInt::getSignMask(BitWidth1);
3061
3062 if (!DemandedBits.intersects(SignMask0))
3063 return TLO.CombineTo(Op, Op0);
3064
3065 if (SimplifyDemandedBits(Op0, ~SignMask0 & DemandedBits, DemandedElts,
3066 Known, TLO, Depth + 1) ||
3067 SimplifyDemandedBits(Op1, SignMask1, DemandedElts, Known2, TLO,
3068 Depth + 1))
3069 return true;
3070
3071 if (Known2.isNonNegative())
3072 return TLO.CombineTo(
3073 Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
3074
3075 if (Known2.isNegative())
3076 return TLO.CombineTo(
3077 Op, TLO.DAG.getNode(ISD::FNEG, dl, VT,
3078 TLO.DAG.getNode(ISD::FABS, SDLoc(Op0), VT, Op0)));
3079
3080 Known.Zero &= ~SignMask0;
3081 Known.One &= ~SignMask0;
3082 break;
3083 }
3084 case ISD::FNEG: {
3085 SDValue Op0 = Op.getOperand(0);
3086 APInt SignMask = APInt::getSignMask(BitWidth);
3087
3088 if (!DemandedBits.intersects(SignMask))
3089 return TLO.CombineTo(Op, Op0);
3090
3091 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known, TLO,
3092 Depth + 1))
3093 return true;
3094
3095 if (!Known.isSignUnknown()) {
3096 Known.Zero ^= SignMask;
3097 Known.One ^= SignMask;
3098 }
3099
3100 break;
3101 }
3102 default:
3103 // We also ask the target about intrinsics (which could be specific to it).
3104 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3105 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3106 // TODO: Probably okay to remove after audit; here to reduce change size
3107 // in initial enablement patch for scalable vectors
3108 if (Op.getValueType().isScalableVector())
3109 break;
3111 Known, TLO, Depth))
3112 return true;
3113 break;
3114 }
3115
3116 // Just use computeKnownBits to compute output bits.
3117 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
3118 break;
3119 }
3120
3121 // If we know the value of all of the demanded bits, return this as a
3122 // constant.
3124 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
3125 // Avoid folding to a constant if any OpaqueConstant is involved.
3126 if (llvm::any_of(Op->ops(), [](SDValue V) {
3127 auto *C = dyn_cast<ConstantSDNode>(V);
3128 return C && C->isOpaque();
3129 }))
3130 return false;
3131 if (VT.isInteger())
3132 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
3133 if (VT.isFloatingPoint())
3134 return TLO.CombineTo(
3135 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
3136 dl, VT));
3137 }
3138
3139 // A multi use 'all demanded elts' simplify failed to find any knownbits.
3140 // Try again just for the original demanded elts.
3141 // Ensure we do this AFTER constant folding above.
3142 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
3143 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
3144
3145 return false;
3146}
3147
3149 const APInt &DemandedElts,
3150 DAGCombinerInfo &DCI) const {
3151 SelectionDAG &DAG = DCI.DAG;
3152 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
3153 !DCI.isBeforeLegalizeOps());
3154
3155 APInt KnownUndef, KnownZero;
3156 bool Simplified =
3157 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
3158 if (Simplified) {
3159 DCI.AddToWorklist(Op.getNode());
3160 DCI.CommitTargetLoweringOpt(TLO);
3161 }
3162
3163 return Simplified;
3164}
3165
3166/// Given a vector binary operation and known undefined elements for each input
3167/// operand, compute whether each element of the output is undefined.
3169 const APInt &UndefOp0,
3170 const APInt &UndefOp1) {
3171 EVT VT = BO.getValueType();
3173 "Vector binop only");
3174
3175 EVT EltVT = VT.getVectorElementType();
3176 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3177 assert(UndefOp0.getBitWidth() == NumElts &&
3178 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3179
3180 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3181 const APInt &UndefVals) {
3182 if (UndefVals[Index])
3183 return DAG.getUNDEF(EltVT);
3184
3185 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3186 // Try hard to make sure that the getNode() call is not creating temporary
3187 // nodes. Ignore opaque integers because they do not constant fold.
3188 SDValue Elt = BV->getOperand(Index);
3189 auto *C = dyn_cast<ConstantSDNode>(Elt);
3190 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3191 return Elt;
3192 }
3193
3194 return SDValue();
3195 };
3196
3197 APInt KnownUndef = APInt::getZero(NumElts);
3198 for (unsigned i = 0; i != NumElts; ++i) {
3199 // If both inputs for this element are either constant or undef and match
3200 // the element type, compute the constant/undef result for this element of
3201 // the vector.
3202 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3203 // not handle FP constants. The code within getNode() should be refactored
3204 // to avoid the danger of creating a bogus temporary node here.
3205 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3206 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3207 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3208 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3209 KnownUndef.setBit(i);
3210 }
3211 return KnownUndef;
3212}
3213
3215 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3216 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3217 bool AssumeSingleUse) const {
3218 EVT VT = Op.getValueType();
3219 unsigned Opcode = Op.getOpcode();
3220 APInt DemandedElts = OriginalDemandedElts;
3221 unsigned NumElts = DemandedElts.getBitWidth();
3222 assert(VT.isVector() && "Expected vector op");
3223
3224 KnownUndef = KnownZero = APInt::getZero(NumElts);
3225
3227 return false;
3228
3229 // TODO: For now we assume we know nothing about scalable vectors.
3230 if (VT.isScalableVector())
3231 return false;
3232
3233 assert(VT.getVectorNumElements() == NumElts &&
3234 "Mask size mismatches value type element count!");
3235
3236 // Undef operand.
3237 if (Op.isUndef()) {
3238 KnownUndef.setAllBits();
3239 return false;
3240 }
3241
3242 // If Op has other users, assume that all elements are needed.
3243 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3244 DemandedElts.setAllBits();
3245
3246 // Not demanding any elements from Op.
3247 if (DemandedElts == 0) {
3248 KnownUndef.setAllBits();
3249 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3250 }
3251
3252 // Limit search depth.
3254 return false;
3255
3256 SDLoc DL(Op);
3257 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3258 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3259
3260 // Helper for demanding the specified elements and all the bits of both binary
3261 // operands.
3262 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3263 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3264 TLO.DAG, Depth + 1);
3265 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3266 TLO.DAG, Depth + 1);
3267 if (NewOp0 || NewOp1) {
3268 SDValue NewOp =
3269 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3270 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3271 return TLO.CombineTo(Op, NewOp);
3272 }
3273 return false;
3274 };
3275
3276 switch (Opcode) {
3277 case ISD::SCALAR_TO_VECTOR: {
3278 if (!DemandedElts[0]) {
3279 KnownUndef.setAllBits();
3280 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3281 }
3282 KnownUndef.setHighBits(NumElts - 1);
3283 break;
3284 }
3285 case ISD::BITCAST: {
3286 SDValue Src = Op.getOperand(0);
3287 EVT SrcVT = Src.getValueType();
3288
3289 if (!SrcVT.isVector()) {
3290 // TODO - bigendian once we have test coverage.
3291 if (IsLE) {
3292 APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3293 unsigned EltSize = VT.getScalarSizeInBits();
3294 for (unsigned I = 0; I != NumElts; ++I) {
3295 if (DemandedElts[I]) {
3296 unsigned Offset = I * EltSize;
3297 DemandedSrcBits.setBits(Offset, Offset + EltSize);
3298 }
3299 }
3300 KnownBits Known;
3301 if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3302 return true;
3303 }
3304 break;
3305 }
3306
3307 // Fast handling of 'identity' bitcasts.
3308 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3309 if (NumSrcElts == NumElts)
3310 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3311 KnownZero, TLO, Depth + 1);
3312
3313 APInt SrcDemandedElts, SrcZero, SrcUndef;
3314
3315 // Bitcast from 'large element' src vector to 'small element' vector, we
3316 // must demand a source element if any DemandedElt maps to it.
3317 if ((NumElts % NumSrcElts) == 0) {
3318 unsigned Scale = NumElts / NumSrcElts;
3319 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3320 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3321 TLO, Depth + 1))
3322 return true;
3323
3324 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3325 // of the large element.
3326 // TODO - bigendian once we have test coverage.
3327 if (IsLE) {
3328 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3329 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3330 for (unsigned i = 0; i != NumElts; ++i)
3331 if (DemandedElts[i]) {
3332 unsigned Ofs = (i % Scale) * EltSizeInBits;
3333 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3334 }
3335
3336 KnownBits Known;
3337 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3338 TLO, Depth + 1))
3339 return true;
3340
3341 // The bitcast has split each wide element into a number of
3342 // narrow subelements. We have just computed the Known bits
3343 // for wide elements. See if element splitting results in
3344 // some subelements being zero. Only for demanded elements!
3345 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3346 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3347 .isAllOnes())
3348 continue;
3349 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3350 unsigned Elt = Scale * SrcElt + SubElt;
3351 if (DemandedElts[Elt])
3352 KnownZero.setBit(Elt);
3353 }
3354 }
3355 }
3356
3357 // If the src element is zero/undef then all the output elements will be -
3358 // only demanded elements are guaranteed to be correct.
3359 for (unsigned i = 0; i != NumSrcElts; ++i) {
3360 if (SrcDemandedElts[i]) {
3361 if (SrcZero[i])
3362 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3363 if (SrcUndef[i])
3364 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3365 }
3366 }
3367 }
3368
3369 // Bitcast from 'small element' src vector to 'large element' vector, we
3370 // demand all smaller source elements covered by the larger demanded element
3371 // of this vector.
3372 if ((NumSrcElts % NumElts) == 0) {
3373 unsigned Scale = NumSrcElts / NumElts;
3374 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3375 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3376 TLO, Depth + 1))
3377 return true;
3378
3379 // If all the src elements covering an output element are zero/undef, then
3380 // the output element will be as well, assuming it was demanded.
3381 for (unsigned i = 0; i != NumElts; ++i) {
3382 if (DemandedElts[i]) {
3383 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3384 KnownZero.setBit(i);
3385 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3386 KnownUndef.setBit(i);
3387 }
3388 }
3389 }
3390 break;
3391 }
3392 case ISD::FREEZE: {
3393 SDValue N0 = Op.getOperand(0);
3395 N0, DemandedElts, UndefPoisonKind::UndefOrPoison, Depth + 1))
3396 return TLO.CombineTo(Op, N0);
3397
3398 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3399 // freeze(op(x, ...)) -> op(freeze(x), ...).
3400 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3401 return TLO.CombineTo(
3403 TLO.DAG.getFreeze(N0.getOperand(0))));
3404 break;
3405 }
3406 case ISD::BUILD_VECTOR: {
3407 // Check all elements and simplify any unused elements with UNDEF.
3408 if (!DemandedElts.isAllOnes()) {
3409 // Don't simplify BROADCASTS.
3410 if (llvm::any_of(Op->op_values(),
3411 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3413 bool Updated = false;
3414 for (unsigned i = 0; i != NumElts; ++i) {
3415 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3416 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3417 KnownUndef.setBit(i);
3418 Updated = true;
3419 }
3420 }
3421 if (Updated)
3422 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3423 }
3424 }
3425 for (unsigned i = 0; i != NumElts; ++i) {
3426 SDValue SrcOp = Op.getOperand(i);
3427 if (SrcOp.isUndef()) {
3428 KnownUndef.setBit(i);
3429 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3431 KnownZero.setBit(i);
3432 }
3433 }
3434 break;
3435 }
3436 case ISD::CONCAT_VECTORS: {
3437 EVT SubVT = Op.getOperand(0).getValueType();
3438 unsigned NumSubVecs = Op.getNumOperands();
3439 unsigned NumSubElts = SubVT.getVectorNumElements();
3440 for (unsigned i = 0; i != NumSubVecs; ++i) {
3441 SDValue SubOp = Op.getOperand(i);
3442 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3443 APInt SubUndef, SubZero;
3444 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3445 Depth + 1))
3446 return true;
3447 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3448 KnownZero.insertBits(SubZero, i * NumSubElts);
3449 }
3450
3451 // Attempt to avoid multi-use ops if we don't need anything from them.
3452 if (!DemandedElts.isAllOnes()) {
3453 bool FoundNewSub = false;
3454 SmallVector<SDValue, 2> DemandedSubOps;
3455 for (unsigned i = 0; i != NumSubVecs; ++i) {
3456 SDValue SubOp = Op.getOperand(i);
3457 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3459 SubOp, SubElts, TLO.DAG, Depth + 1);
3460 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3461 FoundNewSub = NewSubOp ? true : FoundNewSub;
3462 }
3463 if (FoundNewSub) {
3464 SDValue NewOp =
3465 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3466 return TLO.CombineTo(Op, NewOp);
3467 }
3468 }
3469 break;
3470 }
3471 case ISD::INSERT_SUBVECTOR: {
3472 // Demand any elements from the subvector and the remainder from the src it
3473 // is inserted into.
3474 SDValue Src = Op.getOperand(0);
3475 SDValue Sub = Op.getOperand(1);
3476 uint64_t Idx = Op.getConstantOperandVal(2);
3477 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3478 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3479 APInt DemandedSrcElts = DemandedElts;
3480 DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
3481
3482 // If none of the sub operand elements are demanded, bypass the insert.
3483 if (!DemandedSubElts)
3484 return TLO.CombineTo(Op, Src);
3485
3486 APInt SubUndef, SubZero;
3487 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3488 Depth + 1))
3489 return true;
3490
3491 // If none of the src operand elements are demanded, replace it with undef.
3492 if (!DemandedSrcElts && !Src.isUndef())
3493 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3494 TLO.DAG.getUNDEF(VT), Sub,
3495 Op.getOperand(2)));
3496
3497 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3498 TLO, Depth + 1))
3499 return true;
3500 KnownUndef.insertBits(SubUndef, Idx);
3501 KnownZero.insertBits(SubZero, Idx);
3502
3503 // Attempt to avoid multi-use ops if we don't need anything from them.
3504 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3506 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3508 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3509 if (NewSrc || NewSub) {
3510 NewSrc = NewSrc ? NewSrc : Src;
3511 NewSub = NewSub ? NewSub : Sub;
3512 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3513 NewSub, Op.getOperand(2));
3514 return TLO.CombineTo(Op, NewOp);
3515 }
3516 }
3517 break;
3518 }
3520 // Offset the demanded elts by the subvector index.
3521 SDValue Src = Op.getOperand(0);
3522 if (Src.getValueType().isScalableVector())
3523 break;
3524 uint64_t Idx = Op.getConstantOperandVal(1);
3525 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3526 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3527
3528 APInt SrcUndef, SrcZero;
3529 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3530 Depth + 1))
3531 return true;
3532 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3533 KnownZero = SrcZero.extractBits(NumElts, Idx);
3534
3535 // Attempt to avoid multi-use ops if we don't need anything from them.
3536 if (!DemandedElts.isAllOnes()) {
3538 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3539 if (NewSrc) {
3540 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3541 Op.getOperand(1));
3542 return TLO.CombineTo(Op, NewOp);
3543 }
3544 }
3545 break;
3546 }
3548 SDValue Vec = Op.getOperand(0);
3549 SDValue Scl = Op.getOperand(1);
3550 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3551
3552 // For a legal, constant insertion index, if we don't need this insertion
3553 // then strip it, else remove it from the demanded elts.
3554 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3555 unsigned Idx = CIdx->getZExtValue();
3556 if (!DemandedElts[Idx])
3557 return TLO.CombineTo(Op, Vec);
3558
3559 APInt DemandedVecElts(DemandedElts);
3560 DemandedVecElts.clearBit(Idx);
3561 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3562 KnownZero, TLO, Depth + 1))
3563 return true;
3564
3565 KnownUndef.setBitVal(Idx, Scl.isUndef());
3566
3567 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3568 break;
3569 }
3570
3571 APInt VecUndef, VecZero;
3572 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3573 Depth + 1))
3574 return true;
3575 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3576 break;
3577 }
3578 case ISD::VSELECT: {
3579 SDValue Sel = Op.getOperand(0);
3580 SDValue LHS = Op.getOperand(1);
3581 SDValue RHS = Op.getOperand(2);
3582
3583 // Try to transform the select condition based on the current demanded
3584 // elements.
3585 APInt UndefSel, ZeroSel;
3586 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3587 Depth + 1))
3588 return true;
3589
3590 // See if we can simplify either vselect operand.
3591 APInt DemandedLHS(DemandedElts);
3592 APInt DemandedRHS(DemandedElts);
3593 APInt UndefLHS, ZeroLHS;
3594 APInt UndefRHS, ZeroRHS;
3595 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3596 Depth + 1))
3597 return true;
3598 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3599 Depth + 1))
3600 return true;
3601
3602 KnownUndef = UndefLHS & UndefRHS;
3603 KnownZero = ZeroLHS & ZeroRHS;
3604
3605 // If we know that the selected element is always zero, we don't need the
3606 // select value element.
3607 APInt DemandedSel = DemandedElts & ~KnownZero;
3608 if (DemandedSel != DemandedElts)
3609 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3610 Depth + 1))
3611 return true;
3612
3613 break;
3614 }
3615 case ISD::VECTOR_SHUFFLE: {
3616 SDValue LHS = Op.getOperand(0);
3617 SDValue RHS = Op.getOperand(1);
3618 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3619
3620 // Collect demanded elements from shuffle operands..
3621 APInt DemandedLHS(NumElts, 0);
3622 APInt DemandedRHS(NumElts, 0);
3623 for (unsigned i = 0; i != NumElts; ++i) {
3624 int M = ShuffleMask[i];
3625 if (M < 0 || !DemandedElts[i])
3626 continue;
3627 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3628 if (M < (int)NumElts)
3629 DemandedLHS.setBit(M);
3630 else
3631 DemandedRHS.setBit(M - NumElts);
3632 }
3633
3634 // If either side isn't demanded, replace it by UNDEF. We handle this
3635 // explicitly here to also simplify in case of multiple uses (on the
3636 // contrary to the SimplifyDemandedVectorElts calls below).
3637 bool FoldLHS = !DemandedLHS && !LHS.isUndef();
3638 bool FoldRHS = !DemandedRHS && !RHS.isUndef();
3639 if (FoldLHS || FoldRHS) {
3640 LHS = FoldLHS ? TLO.DAG.getUNDEF(LHS.getValueType()) : LHS;
3641 RHS = FoldRHS ? TLO.DAG.getUNDEF(RHS.getValueType()) : RHS;
3642 SDValue NewOp =
3643 TLO.DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, ShuffleMask);
3644 return TLO.CombineTo(Op, NewOp);
3645 }
3646
3647 // See if we can simplify either shuffle operand.
3648 APInt UndefLHS, ZeroLHS;
3649 APInt UndefRHS, ZeroRHS;
3650 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3651 Depth + 1))
3652 return true;
3653 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3654 Depth + 1))
3655 return true;
3656
3657 // Simplify mask using undef elements from LHS/RHS.
3658 bool Updated = false;
3659 bool IdentityLHS = true, IdentityRHS = true;
3660 SmallVector<int, 32> NewMask(ShuffleMask);
3661 for (unsigned i = 0; i != NumElts; ++i) {
3662 int &M = NewMask[i];
3663 if (M < 0)
3664 continue;
3665 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3666 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3667 Updated = true;
3668 M = -1;
3669 }
3670 IdentityLHS &= (M < 0) || (M == (int)i);
3671 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3672 }
3673
3674 // Update legal shuffle masks based on demanded elements if it won't reduce
3675 // to Identity which can cause premature removal of the shuffle mask.
3676 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3677 SDValue LegalShuffle =
3678 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3679 if (LegalShuffle)
3680 return TLO.CombineTo(Op, LegalShuffle);
3681 }
3682
3683 // Propagate undef/zero elements from LHS/RHS.
3684 for (unsigned i = 0; i != NumElts; ++i) {
3685 int M = ShuffleMask[i];
3686 if (M < 0) {
3687 KnownUndef.setBit(i);
3688 } else if (M < (int)NumElts) {
3689 if (UndefLHS[M])
3690 KnownUndef.setBit(i);
3691 if (ZeroLHS[M])
3692 KnownZero.setBit(i);
3693 } else {
3694 if (UndefRHS[M - NumElts])
3695 KnownUndef.setBit(i);
3696 if (ZeroRHS[M - NumElts])
3697 KnownZero.setBit(i);
3698 }
3699 }
3700 break;
3701 }
3705 APInt SrcUndef, SrcZero;
3706 SDValue Src = Op.getOperand(0);
3707 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3708 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3709 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3710 Depth + 1))
3711 return true;
3712 KnownZero = SrcZero.zextOrTrunc(NumElts);
3713 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3714
3715 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3716 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3717 DemandedSrcElts == 1) {
3718 // aext - if we just need the bottom element then we can bitcast.
3719 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3720 }
3721
3722 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3723 // zext(undef) upper bits are guaranteed to be zero.
3724 if (DemandedElts.isSubsetOf(KnownUndef))
3725 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3726 KnownUndef.clearAllBits();
3727
3728 // zext - if we just need the bottom element then we can mask:
3729 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3730 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3731 Op->isOnlyUserOf(Src.getNode()) &&
3732 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3733 SDLoc DL(Op);
3734 EVT SrcVT = Src.getValueType();
3735 EVT SrcSVT = SrcVT.getScalarType();
3736
3737 // If we're after type legalization and SrcSVT is not legal, use the
3738 // promoted type for creating constants to avoid creating nodes with
3739 // illegal types.
3740 if (TLO.LegalTypes())
3741 SrcSVT = getLegalTypeToTransformTo(*TLO.DAG.getContext(), SrcSVT);
3742
3743 SmallVector<SDValue> MaskElts;
3744 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3745 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3746 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3747 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3748 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3749 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3750 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3751 }
3752 }
3753 }
3754 break;
3755 }
3756
3757 // TODO: There are more binop opcodes that could be handled here - MIN,
3758 // MAX, saturated math, etc.
3759 case ISD::ADD: {
3760 SDValue Op0 = Op.getOperand(0);
3761 SDValue Op1 = Op.getOperand(1);
3762 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3763 APInt UndefLHS, ZeroLHS;
3764 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3765 Depth + 1, /*AssumeSingleUse*/ true))
3766 return true;
3767 }
3768 [[fallthrough]];
3769 }
3770 case ISD::AVGCEILS:
3771 case ISD::AVGCEILU:
3772 case ISD::AVGFLOORS:
3773 case ISD::AVGFLOORU:
3774 case ISD::OR:
3775 case ISD::XOR:
3776 case ISD::SUB:
3777 case ISD::FADD:
3778 case ISD::FSUB:
3779 case ISD::FMUL:
3780 case ISD::FDIV:
3781 case ISD::FREM: {
3782 SDValue Op0 = Op.getOperand(0);
3783 SDValue Op1 = Op.getOperand(1);
3784
3785 APInt UndefRHS, ZeroRHS;
3786 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3787 Depth + 1))
3788 return true;
3789 APInt UndefLHS, ZeroLHS;
3790 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3791 Depth + 1))
3792 return true;
3793
3794 KnownZero = ZeroLHS & ZeroRHS;
3795 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3796
3797 // Attempt to avoid multi-use ops if we don't need anything from them.
3798 // TODO - use KnownUndef to relax the demandedelts?
3799 if (!DemandedElts.isAllOnes())
3800 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3801 return true;
3802 break;
3803 }
3804 case ISD::SHL:
3805 case ISD::SRL:
3806 case ISD::SRA:
3807 case ISD::ROTL:
3808 case ISD::ROTR: {
3809 SDValue Op0 = Op.getOperand(0);
3810 SDValue Op1 = Op.getOperand(1);
3811
3812 APInt UndefRHS, ZeroRHS;
3813 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3814 Depth + 1))
3815 return true;
3816 APInt UndefLHS, ZeroLHS;
3817 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3818 Depth + 1))
3819 return true;
3820
3821 KnownZero = ZeroLHS;
3822 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3823
3824 // Attempt to avoid multi-use ops if we don't need anything from them.
3825 // TODO - use KnownUndef to relax the demandedelts?
3826 if (!DemandedElts.isAllOnes())
3827 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3828 return true;
3829 break;
3830 }
3831 case ISD::MUL:
3832 case ISD::MULHU:
3833 case ISD::MULHS:
3834 case ISD::AND: {
3835 SDValue Op0 = Op.getOperand(0);
3836 SDValue Op1 = Op.getOperand(1);
3837
3838 APInt SrcUndef, SrcZero;
3839 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3840 Depth + 1))
3841 return true;
3842 // FIXME: If we know that a demanded element was zero in Op1 we don't need
3843 // to demand it in Op0 - its guaranteed to be zero. There is however a
3844 // restriction, as we must not make any of the originally demanded elements
3845 // more poisonous. We could reduce amount of elements demanded, but then we
3846 // also need a to inform SimplifyDemandedVectorElts that some elements must
3847 // not be made more poisonous.
3848 if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero,
3849 TLO, Depth + 1))
3850 return true;
3851
3852 KnownUndef &= DemandedElts;
3853 KnownZero &= DemandedElts;
3854
3855 // If every element pair has a zero/undef/poison then just fold to zero.
3856 // fold (and x, undef/poison) -> 0 / (and x, 0) -> 0
3857 // fold (mul x, undef/poison) -> 0 / (mul x, 0) -> 0
3858 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3859 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3860
3861 // If either side has a zero element, then the result element is zero, even
3862 // if the other is an UNDEF.
3863 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3864 // and then handle 'and' nodes with the rest of the binop opcodes.
3865 KnownZero |= SrcZero;
3866 KnownUndef &= SrcUndef;
3867 KnownUndef &= ~KnownZero;
3868
3869 // Attempt to avoid multi-use ops if we don't need anything from them.
3870 if (!DemandedElts.isAllOnes())
3871 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3872 return true;
3873 break;
3874 }
3875 case ISD::TRUNCATE:
3876 case ISD::SIGN_EXTEND:
3877 case ISD::ZERO_EXTEND:
3878 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3879 KnownZero, TLO, Depth + 1))
3880 return true;
3881
3882 if (!DemandedElts.isAllOnes())
3884 Op.getOperand(0), DemandedElts, TLO.DAG, Depth + 1))
3885 return TLO.CombineTo(Op, TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp));
3886
3887 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3888 // zext(undef) upper bits are guaranteed to be zero.
3889 if (DemandedElts.isSubsetOf(KnownUndef))
3890 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3891 KnownUndef.clearAllBits();
3892 }
3893 break;
3894 case ISD::SINT_TO_FP:
3895 case ISD::UINT_TO_FP:
3896 case ISD::FP_TO_SINT:
3897 case ISD::FP_TO_UINT:
3898 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3899 KnownZero, TLO, Depth + 1))
3900 return true;
3901 // Don't fall through to generic undef -> undef handling.
3902 return false;
3903 default: {
3904 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3905 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3906 KnownZero, TLO, Depth))
3907 return true;
3908 } else {
3909 KnownBits Known;
3910 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3911 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3912 TLO, Depth, AssumeSingleUse))
3913 return true;
3914 }
3915 break;
3916 }
3917 }
3918 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3919
3920 // Constant fold all undef cases.
3921 // TODO: Handle zero cases as well.
3922 if (DemandedElts.isSubsetOf(KnownUndef))
3923 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3924
3925 return false;
3926}
3927
3928/// Determine which of the bits specified in Mask are known to be either zero or
3929/// one and return them in the Known.
3931 KnownBits &Known,
3932 const APInt &DemandedElts,
3933 const SelectionDAG &DAG,
3934 unsigned Depth) const {
3935 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3936 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3937 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3938 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3939 "Should use MaskedValueIsZero if you don't know whether Op"
3940 " is a target node!");
3941 Known.resetAll();
3942}
3943
3946 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3947 unsigned Depth) const {
3948 Known.resetAll();
3949}
3950
3953 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3954 unsigned Depth) const {
3955 Known.resetAll();
3956}
3957
3959 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3960 // The low bits are known zero if the pointer is aligned.
3961 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3962}
3963
3969
3970/// This method can be implemented by targets that want to expose additional
3971/// information about sign bits to the DAG Combiner.
3973 const APInt &,
3974 const SelectionDAG &,
3975 unsigned Depth) const {
3976 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3977 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3978 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3979 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3980 "Should use ComputeNumSignBits if you don't know whether Op"
3981 " is a target node!");
3982 return 1;
3983}
3984
3986 GISelValueTracking &Analysis, Register R, const APInt &DemandedElts,
3987 const MachineRegisterInfo &MRI, unsigned Depth) const {
3988 return 1;
3989}
3990
3992 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3993 TargetLoweringOpt &TLO, unsigned Depth) const {
3994 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3995 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3996 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3997 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3998 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3999 " is a target node!");
4000 return false;
4001}
4002
4004 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4005 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
4006 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4007 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4008 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4009 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4010 "Should use SimplifyDemandedBits if you don't know whether Op"
4011 " is a target node!");
4012 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
4013 return false;
4014}
4015
4017 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
4018 SelectionDAG &DAG, unsigned Depth) const {
4019 assert(
4020 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4021 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4022 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4023 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4024 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
4025 " is a target node!");
4026 return SDValue();
4027}
4028
4029SDValue
4032 SelectionDAG &DAG) const {
4033 bool LegalMask = isShuffleMaskLegal(Mask, VT);
4034 if (!LegalMask) {
4035 std::swap(N0, N1);
4037 LegalMask = isShuffleMaskLegal(Mask, VT);
4038 }
4039
4040 if (!LegalMask)
4041 return SDValue();
4042
4043 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
4044}
4045
4047 return nullptr;
4048}
4049
4051 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4052 UndefPoisonKind Kind, unsigned Depth) const {
4053 assert(
4054 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4055 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4056 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4057 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4058 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
4059 " is a target node!");
4060
4061 // If Op can't create undef/poison and none of its operands are undef/poison
4062 // then Op is never undef/poison.
4063 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, Kind,
4064 /*ConsiderFlags*/ true, Depth) &&
4065 all_of(Op->ops(), [&](SDValue V) {
4066 return DAG.isGuaranteedNotToBeUndefOrPoison(V, Kind, Depth + 1);
4067 });
4068}
4069
4071 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
4072 UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const {
4073 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4074 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4075 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4076 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4077 "Should use canCreateUndefOrPoison if you don't know whether Op"
4078 " is a target node!");
4079 // Be conservative and return true.
4080 return true;
4081}
4082
4084 KnownFPClass &Known,
4085 const APInt &DemandedElts,
4086 const SelectionDAG &DAG,
4087 unsigned Depth) const {
4088 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4089 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4090 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4091 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4092 "Should use computeKnownFPClass if you don't know whether Op"
4093 " is a target node!");
4094}
4095
4097 const APInt &DemandedElts,
4098 const SelectionDAG &DAG,
4099 bool SNaN,
4100 unsigned Depth) const {
4101 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4102 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4103 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4104 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4105 "Should use isKnownNeverNaN if you don't know whether Op"
4106 " is a target node!");
4107 return false;
4108}
4109
4111 const APInt &DemandedElts,
4112 APInt &UndefElts,
4113 const SelectionDAG &DAG,
4114 unsigned Depth) const {
4115 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
4116 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
4117 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
4118 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
4119 "Should use isSplatValue if you don't know whether Op"
4120 " is a target node!");
4121 return false;
4122}
4123
4124// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
4125// work with truncating build vectors and vectors with elements of less than
4126// 8 bits.
4128 if (!N)
4129 return false;
4130
4131 unsigned EltWidth;
4132 APInt CVal;
4133 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
4134 /*AllowTruncation=*/true)) {
4135 CVal = CN->getAPIntValue();
4136 EltWidth = N.getValueType().getScalarSizeInBits();
4137 } else
4138 return false;
4139
4140 // If this is a truncating splat, truncate the splat value.
4141 // Otherwise, we may fail to match the expected values below.
4142 if (EltWidth < CVal.getBitWidth())
4143 CVal = CVal.trunc(EltWidth);
4144
4145 switch (getBooleanContents(N.getValueType())) {
4147 return CVal[0];
4149 return CVal.isOne();
4151 return CVal.isAllOnes();
4152 }
4153
4154 llvm_unreachable("Invalid boolean contents");
4155}
4156
4158 if (!N)
4159 return false;
4160
4162 if (!CN) {
4164 if (!BV)
4165 return false;
4166
4167 // Only interested in constant splats, we don't care about undef
4168 // elements in identifying boolean constants and getConstantSplatNode
4169 // returns NULL if all ops are undef;
4170 CN = BV->getConstantSplatNode();
4171 if (!CN)
4172 return false;
4173 }
4174
4175 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
4176 return !CN->getAPIntValue()[0];
4177
4178 return CN->isZero();
4179}
4180
4182 bool SExt) const {
4183 if (VT == MVT::i1)
4184 return N->isOne();
4185
4187 switch (Cnt) {
4189 // An extended value of 1 is always true, unless its original type is i1,
4190 // in which case it will be sign extended to -1.
4191 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
4194 return N->isAllOnes() && SExt;
4195 }
4196 llvm_unreachable("Unexpected enumeration.");
4197}
4198
4199/// This helper function of SimplifySetCC tries to optimize the comparison when
4200/// either operand of the SetCC node is a bitwise-and instruction.
4201SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
4202 ISD::CondCode Cond, const SDLoc &DL,
4203 DAGCombinerInfo &DCI) const {
4204 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
4205 std::swap(N0, N1);
4206
4207 SelectionDAG &DAG = DCI.DAG;
4208 EVT OpVT = N0.getValueType();
4209 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
4210 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4211 return SDValue();
4212
4213 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4214 // iff everything but LSB is known zero:
4215 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4218 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4219 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4220 if (DAG.MaskedValueIsZero(N0, UpperBits))
4221 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4222 }
4223
4224 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4225 // test in a narrow type that we can truncate to with no cost. Examples:
4226 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4227 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4228 // TODO: This conservatively checks for type legality on the source and
4229 // destination types. That may inhibit optimizations, but it also
4230 // allows setcc->shift transforms that may be more beneficial.
4231 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4232 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4233 isTypeLegal(OpVT) && N0.hasOneUse()) {
4234 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4235 AndC->getAPIntValue().getActiveBits());
4236 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4237 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4238 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4239 return DAG.getSetCC(DL, VT, Trunc, Zero,
4241 }
4242 }
4243
4244 // Match these patterns in any of their permutations:
4245 // (X & Y) == Y
4246 // (X & Y) != Y
4247 SDValue X, Y;
4248 if (N0.getOperand(0) == N1) {
4249 X = N0.getOperand(1);
4250 Y = N0.getOperand(0);
4251 } else if (N0.getOperand(1) == N1) {
4252 X = N0.getOperand(0);
4253 Y = N0.getOperand(1);
4254 } else {
4255 return SDValue();
4256 }
4257
4258 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4259 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4260 // its liable to create and infinite loop.
4261 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4262 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4264 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4265 // Note that where Y is variable and is known to have at most one bit set
4266 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4267 // equivalent when Y == 0.
4268 assert(OpVT.isInteger());
4270 if (DCI.isBeforeLegalizeOps() ||
4272 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4273 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4274 // If the target supports an 'and-not' or 'and-complement' logic operation,
4275 // try to use that to make a comparison operation more efficient.
4276 // But don't do this transform if the mask is a single bit because there are
4277 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4278 // 'rlwinm' on PPC).
4279
4280 // Bail out if the compare operand that we want to turn into a zero is
4281 // already a zero (otherwise, infinite loop).
4282 if (isNullConstant(Y))
4283 return SDValue();
4284
4285 // Transform this into: ~X & Y == 0.
4286 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4287 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4288 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4289 }
4290
4291 return SDValue();
4292}
4293
4294/// This helper function of SimplifySetCC tries to optimize the comparison when
4295/// either operand of the SetCC node is a bitwise-or instruction.
4296/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4297SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4298 ISD::CondCode Cond, const SDLoc &DL,
4299 DAGCombinerInfo &DCI) const {
4300 if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4301 std::swap(N0, N1);
4302
4303 SelectionDAG &DAG = DCI.DAG;
4304 EVT OpVT = N0.getValueType();
4305 if (!N0.hasOneUse() || !OpVT.isInteger() ||
4306 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
4307 return SDValue();
4308
4309 // (X | Y) == Y
4310 // (X | Y) != Y
4311 SDValue X;
4312 if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(X)) {
4313 // If the target supports an 'and-not' or 'and-complement' logic operation,
4314 // try to use that to make a comparison operation more efficient.
4315
4316 // Bail out if the compare operand that we want to turn into a zero is
4317 // already a zero (otherwise, infinite loop).
4318 if (isNullConstant(N1))
4319 return SDValue();
4320
4321 // Transform this into: X & ~Y ==/!= 0.
4322 SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4323 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4324 return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4325 }
4326
4327 return SDValue();
4328}
4329
4330/// There are multiple IR patterns that could be checking whether certain
4331/// truncation of a signed number would be lossy or not. The pattern which is
4332/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4333/// We are looking for the following pattern: (KeptBits is a constant)
4334/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4335/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4336/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4337/// We will unfold it into the natural trunc+sext pattern:
4338/// ((%x << C) a>> C) dstcond %x
4339/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4340SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4341 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4342 const SDLoc &DL) const {
4343 // We must be comparing with a constant.
4344 ConstantSDNode *C1;
4345 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4346 return SDValue();
4347
4348 // N0 should be: add %x, (1 << (KeptBits-1))
4349 if (N0->getOpcode() != ISD::ADD)
4350 return SDValue();
4351
4352 // And we must be 'add'ing a constant.
4353 ConstantSDNode *C01;
4354 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4355 return SDValue();
4356
4357 SDValue X = N0->getOperand(0);
4358 EVT XVT = X.getValueType();
4359
4360 // Validate constants ...
4361
4362 APInt I1 = C1->getAPIntValue();
4363
4364 ISD::CondCode NewCond;
4365 if (Cond == ISD::CondCode::SETULT) {
4366 NewCond = ISD::CondCode::SETEQ;
4367 } else if (Cond == ISD::CondCode::SETULE) {
4368 NewCond = ISD::CondCode::SETEQ;
4369 // But need to 'canonicalize' the constant.
4370 I1 += 1;
4371 } else if (Cond == ISD::CondCode::SETUGT) {
4372 NewCond = ISD::CondCode::SETNE;
4373 // But need to 'canonicalize' the constant.
4374 I1 += 1;
4375 } else if (Cond == ISD::CondCode::SETUGE) {
4376 NewCond = ISD::CondCode::SETNE;
4377 } else
4378 return SDValue();
4379
4380 APInt I01 = C01->getAPIntValue();
4381
4382 auto checkConstants = [&I1, &I01]() -> bool {
4383 // Both of them must be power-of-two, and the constant from setcc is bigger.
4384 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4385 };
4386
4387 if (checkConstants()) {
4388 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4389 } else {
4390 // What if we invert constants? (and the target predicate)
4391 I1.negate();
4392 I01.negate();
4393 assert(XVT.isInteger());
4394 NewCond = getSetCCInverse(NewCond, XVT);
4395 if (!checkConstants())
4396 return SDValue();
4397 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4398 }
4399
4400 // They are power-of-two, so which bit is set?
4401 const unsigned KeptBits = I1.logBase2();
4402 const unsigned KeptBitsMinusOne = I01.logBase2();
4403
4404 // Magic!
4405 if (KeptBits != (KeptBitsMinusOne + 1))
4406 return SDValue();
4407 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4408
4409 // We don't want to do this in every single case.
4410 SelectionDAG &DAG = DCI.DAG;
4411 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4412 return SDValue();
4413
4414 // Unfold into: sext_inreg(%x) cond %x
4415 // Where 'cond' will be either 'eq' or 'ne'.
4416 SDValue SExtInReg = DAG.getNode(
4418 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4419 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4420}
4421
4422// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4423SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4424 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4425 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4427 "Should be a comparison with 0.");
4428 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4429 "Valid only for [in]equality comparisons.");
4430
4431 unsigned NewShiftOpcode;
4432 SDValue X, C, Y;
4433
4434 SelectionDAG &DAG = DCI.DAG;
4435
4436 // Look for '(C l>>/<< Y)'.
4437 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4438 // The shift should be one-use.
4439 if (!V.hasOneUse())
4440 return false;
4441 unsigned OldShiftOpcode = V.getOpcode();
4442 switch (OldShiftOpcode) {
4443 case ISD::SHL:
4444 NewShiftOpcode = ISD::SRL;
4445 break;
4446 case ISD::SRL:
4447 NewShiftOpcode = ISD::SHL;
4448 break;
4449 default:
4450 return false; // must be a logical shift.
4451 }
4452 // We should be shifting a constant.
4453 // FIXME: best to use isConstantOrConstantVector().
4454 C = V.getOperand(0);
4455 ConstantSDNode *CC =
4456 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4457 if (!CC)
4458 return false;
4459 Y = V.getOperand(1);
4460
4461 ConstantSDNode *XC =
4462 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4464 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4465 };
4466
4467 // LHS of comparison should be an one-use 'and'.
4468 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4469 return SDValue();
4470
4471 X = N0.getOperand(0);
4472 SDValue Mask = N0.getOperand(1);
4473
4474 // 'and' is commutative!
4475 if (!Match(Mask)) {
4476 std::swap(X, Mask);
4477 if (!Match(Mask))
4478 return SDValue();
4479 }
4480
4481 EVT VT = X.getValueType();
4482
4483 // Produce:
4484 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4485 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4486 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4487 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4488 return T2;
4489}
4490
4491/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4492/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4493/// handle the commuted versions of these patterns.
4494SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4495 ISD::CondCode Cond, const SDLoc &DL,
4496 DAGCombinerInfo &DCI) const {
4497 unsigned BOpcode = N0.getOpcode();
4498 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4499 "Unexpected binop");
4500 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4501
4502 // (X + Y) == X --> Y == 0
4503 // (X - Y) == X --> Y == 0
4504 // (X ^ Y) == X --> Y == 0
4505 SelectionDAG &DAG = DCI.DAG;
4506 EVT OpVT = N0.getValueType();
4507 SDValue X = N0.getOperand(0);
4508 SDValue Y = N0.getOperand(1);
4509 if (X == N1)
4510 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4511
4512 if (Y != N1)
4513 return SDValue();
4514
4515 // (X + Y) == Y --> X == 0
4516 // (X ^ Y) == Y --> X == 0
4517 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4518 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4519
4520 // The shift would not be valid if the operands are boolean (i1).
4521 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4522 return SDValue();
4523
4524 // (X - Y) == Y --> X == Y << 1
4525 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4526 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4527 if (!DCI.isCalledByLegalizer())
4528 DCI.AddToWorklist(YShl1.getNode());
4529 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4530}
4531
4533 SDValue N0, const APInt &C1,
4534 ISD::CondCode Cond, const SDLoc &dl,
4535 SelectionDAG &DAG) {
4536 // Look through truncs that don't change the value of a ctpop.
4537 // FIXME: Add vector support? Need to be careful with setcc result type below.
4538 SDValue CTPOP = N0;
4539 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4541 CTPOP = N0.getOperand(0);
4542
4543 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4544 return SDValue();
4545
4546 EVT CTVT = CTPOP.getValueType();
4547 SDValue CTOp = CTPOP.getOperand(0);
4548
4549 // Expand a power-of-2-or-zero comparison based on ctpop:
4550 // (ctpop x) u< 2 -> (x & x-1) == 0
4551 // (ctpop x) u> 1 -> (x & x-1) != 0
4552 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4553 // Keep the CTPOP if it is a cheap vector op.
4554 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4555 return SDValue();
4556
4557 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4558 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4559 return SDValue();
4560 if (C1 == 0 && (Cond == ISD::SETULT))
4561 return SDValue(); // This is handled elsewhere.
4562
4563 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4564
4565 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4566 SDValue Result = CTOp;
4567 for (unsigned i = 0; i < Passes; i++) {
4568 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4569 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4570 }
4572 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4573 }
4574
4575 // Expand a power-of-2 comparison based on ctpop
4576 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4577 // Keep the CTPOP if it is cheap.
4578 if (TLI.isCtpopFast(CTVT))
4579 return SDValue();
4580
4581 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4582 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4583 assert(CTVT.isInteger());
4584 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4585
4586 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4587 // check before emitting a potentially unnecessary op.
4588 if (DAG.isKnownNeverZero(CTOp)) {
4589 // (ctpop x) == 1 --> (x & x-1) == 0
4590 // (ctpop x) != 1 --> (x & x-1) != 0
4591 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4592 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4593 return RHS;
4594 }
4595
4596 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4597 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4598 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4600 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4601 }
4602
4603 return SDValue();
4604}
4605
4607 ISD::CondCode Cond, const SDLoc &dl,
4608 SelectionDAG &DAG) {
4609 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4610 return SDValue();
4611
4612 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4613 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4614 return SDValue();
4615
4616 auto getRotateSource = [](SDValue X) {
4617 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4618 return X.getOperand(0);
4619 return SDValue();
4620 };
4621
4622 // Peek through a rotated value compared against 0 or -1:
4623 // (rot X, Y) == 0/-1 --> X == 0/-1
4624 // (rot X, Y) != 0/-1 --> X != 0/-1
4625 if (SDValue R = getRotateSource(N0))
4626 return DAG.getSetCC(dl, VT, R, N1, Cond);
4627
4628 // Peek through an 'or' of a rotated value compared against 0:
4629 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4630 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4631 //
4632 // TODO: Add the 'and' with -1 sibling.
4633 // TODO: Recurse through a series of 'or' ops to find the rotate.
4634 EVT OpVT = N0.getValueType();
4635 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4636 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4637 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4638 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4639 }
4640 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4641 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4642 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4643 }
4644 }
4645
4646 return SDValue();
4647}
4648
4650 ISD::CondCode Cond, const SDLoc &dl,
4651 SelectionDAG &DAG) {
4652 // If we are testing for all-bits-clear, we might be able to do that with
4653 // less shifting since bit-order does not matter.
4654 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4655 return SDValue();
4656
4657 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4658 if (!C1 || !C1->isZero())
4659 return SDValue();
4660
4661 if (!N0.hasOneUse() ||
4662 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4663 return SDValue();
4664
4665 unsigned BitWidth = N0.getScalarValueSizeInBits();
4666 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4667 if (!ShAmtC)
4668 return SDValue();
4669
4670 uint64_t ShAmt = ShAmtC->getAPIntValue().urem(BitWidth);
4671 if (ShAmt == 0)
4672 return SDValue();
4673
4674 // Canonicalize fshr as fshl to reduce pattern-matching.
4675 if (N0.getOpcode() == ISD::FSHR)
4676 ShAmt = BitWidth - ShAmt;
4677
4678 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4679 SDValue X, Y;
4680 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4681 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4682 return false;
4683 if (Or.getOperand(0) == Other) {
4684 X = Or.getOperand(0);
4685 Y = Or.getOperand(1);
4686 return true;
4687 }
4688 if (Or.getOperand(1) == Other) {
4689 X = Or.getOperand(1);
4690 Y = Or.getOperand(0);
4691 return true;
4692 }
4693 return false;
4694 };
4695
4696 EVT OpVT = N0.getValueType();
4697 EVT ShAmtVT = N0.getOperand(2).getValueType();
4698 SDValue F0 = N0.getOperand(0);
4699 SDValue F1 = N0.getOperand(1);
4700 if (matchOr(F0, F1)) {
4701 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4702 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4703 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4704 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4705 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4706 }
4707 if (matchOr(F1, F0)) {
4708 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4709 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4710 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4711 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4712 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4713 }
4714
4715 return SDValue();
4716}
4717
4718/// Try to simplify a setcc built with the specified operands and cc. If it is
4719/// unable to simplify it, return a null SDValue.
4721 ISD::CondCode Cond, bool foldBooleans,
4722 DAGCombinerInfo &DCI,
4723 const SDLoc &dl) const {
4724 SelectionDAG &DAG = DCI.DAG;
4725 const DataLayout &Layout = DAG.getDataLayout();
4726 EVT OpVT = N0.getValueType();
4727 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4728
4729 // Constant fold or commute setcc.
4730 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4731 return Fold;
4732
4733 bool N0ConstOrSplat =
4734 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4735 bool N1ConstOrSplat =
4736 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4737
4738 // Canonicalize toward having the constant on the RHS.
4739 // TODO: Handle non-splat vector constants. All undef causes trouble.
4740 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4741 // infinite loop here when we encounter one.
4743 if (N0ConstOrSplat && !N1ConstOrSplat &&
4744 (DCI.isBeforeLegalizeOps() ||
4745 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4746 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4747
4748 // If we have a subtract with the same 2 non-constant operands as this setcc
4749 // -- but in reverse order -- then try to commute the operands of this setcc
4750 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4751 // instruction on some targets.
4752 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4753 (DCI.isBeforeLegalizeOps() ||
4754 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4755 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4756 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4757 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4758
4759 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4760 return V;
4761
4762 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4763 return V;
4764
4765 if (auto *N1C = isConstOrConstSplat(N1)) {
4766 const APInt &C1 = N1C->getAPIntValue();
4767
4768 // Optimize some CTPOP cases.
4769 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4770 return V;
4771
4772 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4773 // X * Y == 0 --> (X == 0) || (Y == 0)
4774 // X * Y != 0 --> (X != 0) && (Y != 0)
4775 // TODO: This bails out if minsize is set, but if the target doesn't have a
4776 // single instruction multiply for this type, it would likely be
4777 // smaller to decompose.
4778 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4779 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4780 (N0->getFlags().hasNoUnsignedWrap() ||
4781 N0->getFlags().hasNoSignedWrap()) &&
4782 !Attr.hasFnAttr(Attribute::MinSize)) {
4783 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4784 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4785 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4786 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4787 }
4788
4789 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4790 // equality comparison, then we're just comparing whether X itself is
4791 // zero.
4792 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4793 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4795 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4796 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4797 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4798 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4799 // (srl (ctlz x), 5) == 0 -> X != 0
4800 // (srl (ctlz x), 5) != 1 -> X != 0
4801 Cond = ISD::SETNE;
4802 } else {
4803 // (srl (ctlz x), 5) != 0 -> X == 0
4804 // (srl (ctlz x), 5) == 1 -> X == 0
4805 Cond = ISD::SETEQ;
4806 }
4807 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4808 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4809 Cond);
4810 }
4811 }
4812 }
4813 }
4814
4815 // setcc X, 0, setlt --> X (when X is all sign bits)
4816 // setcc X, 0, setne --> X (when X is all sign bits)
4817 //
4818 // When we know that X has 0 or -1 in each element (or scalar), this
4819 // comparison will produce X. This is only true when boolean contents are
4820 // represented via 0s and -1s.
4821 if (VT == OpVT &&
4822 // Check that the result of setcc is 0 and -1.
4824 // Match only for checks X < 0 and X != 0
4825 (Cond == ISD::SETLT || Cond == ISD::SETNE) && isNullOrNullSplat(N1) &&
4826 // The identity holds iff we know all sign bits for all lanes.
4828 return N0;
4829
4830 // FIXME: Support vectors.
4831 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4832 const APInt &C1 = N1C->getAPIntValue();
4833
4834 // (zext x) == C --> x == (trunc C)
4835 // (sext x) == C --> x == (trunc C)
4836 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4837 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4838 unsigned MinBits = N0.getValueSizeInBits();
4839 SDValue PreExt;
4840 bool Signed = false;
4841 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4842 // ZExt
4843 MinBits = N0->getOperand(0).getValueSizeInBits();
4844 PreExt = N0->getOperand(0);
4845 } else if (N0->getOpcode() == ISD::AND) {
4846 // DAGCombine turns costly ZExts into ANDs
4847 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4848 if ((C->getAPIntValue()+1).isPowerOf2()) {
4849 MinBits = C->getAPIntValue().countr_one();
4850 PreExt = N0->getOperand(0);
4851 }
4852 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4853 // SExt
4854 MinBits = N0->getOperand(0).getValueSizeInBits();
4855 PreExt = N0->getOperand(0);
4856 Signed = true;
4857 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4858 // ZEXTLOAD / SEXTLOAD
4859 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4860 MinBits = LN0->getMemoryVT().getSizeInBits();
4861 PreExt = N0;
4862 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4863 Signed = true;
4864 MinBits = LN0->getMemoryVT().getSizeInBits();
4865 PreExt = N0;
4866 }
4867 }
4868
4869 // Figure out how many bits we need to preserve this constant.
4870 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4871
4872 // Make sure we're not losing bits from the constant.
4873 if (MinBits > 0 &&
4874 MinBits < C1.getBitWidth() &&
4875 MinBits >= ReqdBits) {
4876 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4877 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4878 // Will get folded away.
4879 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4880 if (MinBits == 1 && C1 == 1)
4881 // Invert the condition.
4882 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4884 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4885 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4886 }
4887
4888 // If truncating the setcc operands is not desirable, we can still
4889 // simplify the expression in some cases:
4890 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4891 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4892 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4893 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4894 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4895 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4896 SDValue TopSetCC = N0->getOperand(0);
4897 unsigned N0Opc = N0->getOpcode();
4898 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4899 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4900 TopSetCC.getOpcode() == ISD::SETCC &&
4901 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4902 (isConstFalseVal(N1) ||
4903 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4904
4905 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4906 (!N1C->isZero() && Cond == ISD::SETNE);
4907
4908 if (!Inverse)
4909 return TopSetCC;
4910
4912 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4913 TopSetCC.getOperand(0).getValueType());
4914 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4915 TopSetCC.getOperand(1),
4916 InvCond);
4917 }
4918 }
4919 }
4920
4921 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4922 // equality or unsigned, and all 1 bits of the const are in the same
4923 // partial word, see if we can shorten the load.
4924 if (DCI.isBeforeLegalize() &&
4926 N0.getOpcode() == ISD::AND && C1 == 0 &&
4927 N0.getNode()->hasOneUse() &&
4928 isa<LoadSDNode>(N0.getOperand(0)) &&
4929 N0.getOperand(0).getNode()->hasOneUse() &&
4931 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4932 APInt bestMask;
4933 unsigned bestWidth = 0, bestOffset = 0;
4934 if (Lod->isSimple() && Lod->isUnindexed() &&
4935 (Lod->getMemoryVT().isByteSized() ||
4936 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4937 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4938 unsigned origWidth = N0.getValueSizeInBits();
4939 unsigned maskWidth = origWidth;
4940 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4941 // 8 bits, but have to be careful...
4942 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4943 origWidth = Lod->getMemoryVT().getSizeInBits();
4944 const APInt &Mask = N0.getConstantOperandAPInt(1);
4945 // Only consider power-of-2 widths (and at least one byte) as candiates
4946 // for the narrowed load.
4947 for (unsigned width = 8; width < origWidth; width *= 2) {
4948 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4949 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4950 // Avoid accessing any padding here for now (we could use memWidth
4951 // instead of origWidth here otherwise).
4952 unsigned maxOffset = origWidth - width;
4953 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4954 if (Mask.isSubsetOf(newMask)) {
4955 unsigned ptrOffset =
4956 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4957 unsigned IsFast = 0;
4958 assert((ptrOffset % 8) == 0 && "Non-Bytealigned pointer offset");
4959 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4961 ptrOffset / 8) &&
4963 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4964 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4965 IsFast) {
4966 bestOffset = ptrOffset / 8;
4967 bestMask = Mask.lshr(offset);
4968 bestWidth = width;
4969 break;
4970 }
4971 }
4972 newMask <<= 8;
4973 }
4974 if (bestWidth)
4975 break;
4976 }
4977 }
4978 if (bestWidth) {
4979 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4980 SDValue Ptr = Lod->getBasePtr();
4981 if (bestOffset != 0)
4982 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4983 SDValue NewLoad =
4984 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4985 Lod->getPointerInfo().getWithOffset(bestOffset),
4986 Lod->getBaseAlign());
4987 SDValue And =
4988 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4989 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4990 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4991 }
4992 }
4993
4994 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4995 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4996 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4997
4998 // If the comparison constant has bits in the upper part, the
4999 // zero-extended value could never match.
5001 C1.getBitWidth() - InSize))) {
5002 switch (Cond) {
5003 case ISD::SETUGT:
5004 case ISD::SETUGE:
5005 case ISD::SETEQ:
5006 return DAG.getConstant(0, dl, VT);
5007 case ISD::SETULT:
5008 case ISD::SETULE:
5009 case ISD::SETNE:
5010 return DAG.getConstant(1, dl, VT);
5011 case ISD::SETGT:
5012 case ISD::SETGE:
5013 // True if the sign bit of C1 is set.
5014 return DAG.getConstant(C1.isNegative(), dl, VT);
5015 case ISD::SETLT:
5016 case ISD::SETLE:
5017 // True if the sign bit of C1 isn't set.
5018 return DAG.getConstant(C1.isNonNegative(), dl, VT);
5019 default:
5020 break;
5021 }
5022 }
5023
5024 // Otherwise, we can perform the comparison with the low bits.
5025 switch (Cond) {
5026 case ISD::SETEQ:
5027 case ISD::SETNE:
5028 case ISD::SETUGT:
5029 case ISD::SETUGE:
5030 case ISD::SETULT:
5031 case ISD::SETULE: {
5032 EVT newVT = N0.getOperand(0).getValueType();
5033 // FIXME: Should use isNarrowingProfitable.
5034 if (DCI.isBeforeLegalizeOps() ||
5035 (isOperationLegal(ISD::SETCC, newVT) &&
5036 isCondCodeLegal(Cond, newVT.getSimpleVT()) &&
5038 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
5039 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
5040
5041 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
5042 NewConst, Cond);
5043 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
5044 }
5045 break;
5046 }
5047 default:
5048 break; // todo, be more careful with signed comparisons
5049 }
5050 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
5051 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5053 OpVT)) {
5054 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
5055 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
5056 EVT ExtDstTy = N0.getValueType();
5057 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
5058
5059 // If the constant doesn't fit into the number of bits for the source of
5060 // the sign extension, it is impossible for both sides to be equal.
5061 if (C1.getSignificantBits() > ExtSrcTyBits)
5062 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
5063
5064 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
5065 ExtDstTy != ExtSrcTy && "Unexpected types!");
5066 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
5067 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
5068 DAG.getConstant(Imm, dl, ExtDstTy));
5069 if (!DCI.isCalledByLegalizer())
5070 DCI.AddToWorklist(ZextOp.getNode());
5071 // Otherwise, make this a use of a zext.
5072 return DAG.getSetCC(dl, VT, ZextOp,
5073 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
5074 } else if ((N1C->isZero() || N1C->isOne()) &&
5075 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5076 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
5077 // excluded as they are handled below whilst checking for foldBooleans.
5078 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
5079 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
5080 (N0.getValueType() == MVT::i1 ||
5084 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
5085 if (TrueWhenTrue)
5086 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
5087 // Invert the condition.
5088 if (N0.getOpcode() == ISD::SETCC) {
5091 if (DCI.isBeforeLegalizeOps() ||
5093 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
5094 }
5095 }
5096
5097 if ((N0.getOpcode() == ISD::XOR ||
5098 (N0.getOpcode() == ISD::AND &&
5099 N0.getOperand(0).getOpcode() == ISD::XOR &&
5100 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
5101 isOneConstant(N0.getOperand(1))) {
5102 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
5103 // can only do this if the top bits are known zero.
5104 unsigned BitWidth = N0.getValueSizeInBits();
5105 if (DAG.MaskedValueIsZero(N0,
5107 BitWidth-1))) {
5108 // Okay, get the un-inverted input value.
5109 SDValue Val;
5110 if (N0.getOpcode() == ISD::XOR) {
5111 Val = N0.getOperand(0);
5112 } else {
5113 assert(N0.getOpcode() == ISD::AND &&
5114 N0.getOperand(0).getOpcode() == ISD::XOR);
5115 // ((X^1)&1)^1 -> X & 1
5116 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
5117 N0.getOperand(0).getOperand(0),
5118 N0.getOperand(1));
5119 }
5120
5121 return DAG.getSetCC(dl, VT, Val, N1,
5123 }
5124 } else if (N1C->isOne()) {
5125 SDValue Op0 = N0;
5126 if (Op0.getOpcode() == ISD::TRUNCATE)
5127 Op0 = Op0.getOperand(0);
5128
5129 if ((Op0.getOpcode() == ISD::XOR) &&
5130 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
5131 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
5132 SDValue XorLHS = Op0.getOperand(0);
5133 SDValue XorRHS = Op0.getOperand(1);
5134 // Ensure that the input setccs return an i1 type or 0/1 value.
5135 if (Op0.getValueType() == MVT::i1 ||
5140 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
5142 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
5143 }
5144 }
5145 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
5146 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
5147 if (Op0.getValueType().bitsGT(VT))
5148 Op0 = DAG.getNode(ISD::AND, dl, VT,
5149 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
5150 DAG.getConstant(1, dl, VT));
5151 else if (Op0.getValueType().bitsLT(VT))
5152 Op0 = DAG.getNode(ISD::AND, dl, VT,
5153 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
5154 DAG.getConstant(1, dl, VT));
5155
5156 return DAG.getSetCC(dl, VT, Op0,
5157 DAG.getConstant(0, dl, Op0.getValueType()),
5159 }
5160 if (Op0.getOpcode() == ISD::AssertZext &&
5161 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
5162 return DAG.getSetCC(dl, VT, Op0,
5163 DAG.getConstant(0, dl, Op0.getValueType()),
5165 }
5166 }
5167
5168 // Given:
5169 // icmp eq/ne (urem %x, %y), 0
5170 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
5171 // icmp eq/ne %x, 0
5172 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
5173 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5174 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
5175 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
5176 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
5177 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
5178 }
5179
5180 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
5181 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
5182 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5184 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
5185 N1C->isAllOnes()) {
5186 return DAG.getSetCC(dl, VT, N0.getOperand(0),
5187 DAG.getConstant(0, dl, OpVT),
5189 }
5190
5191 // fold (setcc (trunc x) c) -> (setcc x c)
5192 if (N0.getOpcode() == ISD::TRUNCATE &&
5194 (N0->getFlags().hasNoSignedWrap() &&
5197 EVT NewVT = N0.getOperand(0).getValueType();
5198 SDValue NewConst = DAG.getConstant(
5200 ? C1.sext(NewVT.getSizeInBits())
5201 : C1.zext(NewVT.getSizeInBits()),
5202 dl, NewVT);
5203 return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond);
5204 }
5205
5206 if (SDValue V =
5207 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
5208 return V;
5209 }
5210
5211 // These simplifications apply to splat vectors as well.
5212 // TODO: Handle more splat vector cases.
5213 if (auto *N1C = isConstOrConstSplat(N1)) {
5214 const APInt &C1 = N1C->getAPIntValue();
5215
5216 APInt MinVal, MaxVal;
5217 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
5219 MinVal = APInt::getSignedMinValue(OperandBitSize);
5220 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
5221 } else {
5222 MinVal = APInt::getMinValue(OperandBitSize);
5223 MaxVal = APInt::getMaxValue(OperandBitSize);
5224 }
5225
5226 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
5227 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
5228 // X >= MIN --> true
5229 if (C1 == MinVal)
5230 return DAG.getBoolConstant(true, dl, VT, OpVT);
5231
5232 if (!VT.isVector()) { // TODO: Support this for vectors.
5233 // X >= C0 --> X > (C0 - 1)
5234 APInt C = C1 - 1;
5236 if ((DCI.isBeforeLegalizeOps() ||
5237 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5238 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5239 isLegalICmpImmediate(C.getSExtValue())))) {
5240 return DAG.getSetCC(dl, VT, N0,
5241 DAG.getConstant(C, dl, N1.getValueType()),
5242 NewCC);
5243 }
5244 }
5245 }
5246
5247 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
5248 // X <= MAX --> true
5249 if (C1 == MaxVal)
5250 return DAG.getBoolConstant(true, dl, VT, OpVT);
5251
5252 // X <= C0 --> X < (C0 + 1)
5253 if (!VT.isVector()) { // TODO: Support this for vectors.
5254 APInt C = C1 + 1;
5256 if ((DCI.isBeforeLegalizeOps() ||
5257 isCondCodeLegal(NewCC, OpVT.getSimpleVT())) &&
5258 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
5259 isLegalICmpImmediate(C.getSExtValue())))) {
5260 return DAG.getSetCC(dl, VT, N0,
5261 DAG.getConstant(C, dl, N1.getValueType()),
5262 NewCC);
5263 }
5264 }
5265 }
5266
5267 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
5268 if (C1 == MinVal)
5269 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
5270
5271 // TODO: Support this for vectors after legalize ops.
5272 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5273 // Canonicalize setlt X, Max --> setne X, Max
5274 if (C1 == MaxVal)
5275 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5276
5277 // If we have setult X, 1, turn it into seteq X, 0
5278 if (C1 == MinVal+1)
5279 return DAG.getSetCC(dl, VT, N0,
5280 DAG.getConstant(MinVal, dl, N0.getValueType()),
5281 ISD::SETEQ);
5282 }
5283 }
5284
5285 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5286 if (C1 == MaxVal)
5287 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5288
5289 // TODO: Support this for vectors after legalize ops.
5290 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5291 // Canonicalize setgt X, Min --> setne X, Min
5292 if (C1 == MinVal)
5293 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5294
5295 // If we have setugt X, Max-1, turn it into seteq X, Max
5296 if (C1 == MaxVal-1)
5297 return DAG.getSetCC(dl, VT, N0,
5298 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5299 ISD::SETEQ);
5300 }
5301 }
5302
5303 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5304 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5305 if (C1.isZero())
5306 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5307 VT, N0, N1, Cond, DCI, dl))
5308 return CC;
5309
5310 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5311 // For example, when high 32-bits of i64 X are known clear:
5312 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5313 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5314 bool CmpZero = N1C->isZero();
5315 bool CmpNegOne = N1C->isAllOnes();
5316 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5317 // Match or(lo,shl(hi,bw/2)) pattern.
5318 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5319 unsigned EltBits = V.getScalarValueSizeInBits();
5320 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5321 return false;
5322 SDValue LHS = V.getOperand(0);
5323 SDValue RHS = V.getOperand(1);
5324 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5325 // Unshifted element must have zero upperbits.
5326 if (RHS.getOpcode() == ISD::SHL &&
5327 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5328 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5329 DAG.MaskedValueIsZero(LHS, HiBits)) {
5330 Lo = LHS;
5331 Hi = RHS.getOperand(0);
5332 return true;
5333 }
5334 if (LHS.getOpcode() == ISD::SHL &&
5335 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5336 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5337 DAG.MaskedValueIsZero(RHS, HiBits)) {
5338 Lo = RHS;
5339 Hi = LHS.getOperand(0);
5340 return true;
5341 }
5342 return false;
5343 };
5344
5345 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5346 unsigned EltBits = N0.getScalarValueSizeInBits();
5347 unsigned HalfBits = EltBits / 2;
5348 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5349 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5350 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5351 SDValue NewN0 =
5352 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5353 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5354 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5355 };
5356
5357 SDValue Lo, Hi;
5358 if (IsConcat(N0, Lo, Hi))
5359 return MergeConcat(Lo, Hi);
5360
5361 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5362 SDValue Lo0, Lo1, Hi0, Hi1;
5363 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5364 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5365 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5366 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5367 }
5368 }
5369 }
5370 }
5371
5372 // If we have "setcc X, C0", check to see if we can shrink the immediate
5373 // by changing cc.
5374 // TODO: Support this for vectors after legalize ops.
5375 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5376 // SETUGT X, SINTMAX -> SETLT X, 0
5377 // SETUGE X, SINTMIN -> SETLT X, 0
5378 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5379 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5380 return DAG.getSetCC(dl, VT, N0,
5381 DAG.getConstant(0, dl, N1.getValueType()),
5382 ISD::SETLT);
5383
5384 // SETULT X, SINTMIN -> SETGT X, -1
5385 // SETULE X, SINTMAX -> SETGT X, -1
5386 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5387 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5388 return DAG.getSetCC(dl, VT, N0,
5389 DAG.getAllOnesConstant(dl, N1.getValueType()),
5390 ISD::SETGT);
5391 }
5392 }
5393
5394 // Back to non-vector simplifications.
5395 // TODO: Can we do these for vector splats?
5396 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5397 const APInt &C1 = N1C->getAPIntValue();
5398 EVT ShValTy = N0.getValueType();
5399
5400 // Fold bit comparisons when we can. This will result in an
5401 // incorrect value when boolean false is negative one, unless
5402 // the bitsize is 1 in which case the false value is the same
5403 // in practice regardless of the representation.
5404 if ((VT.getSizeInBits() == 1 ||
5406 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5407 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5408 N0.getOpcode() == ISD::AND) {
5409 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5410 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5411 // Perform the xform if the AND RHS is a single bit.
5412 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5413 if (AndRHS->getAPIntValue().isPowerOf2() &&
5414 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5415 return DAG.getNode(
5416 ISD::TRUNCATE, dl, VT,
5417 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5418 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5419 }
5420 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5421 // (X & 8) == 8 --> (X & 8) >> 3
5422 // Perform the xform if C1 is a single bit.
5423 unsigned ShCt = C1.logBase2();
5424 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5425 return DAG.getNode(
5426 ISD::TRUNCATE, dl, VT,
5427 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5428 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5429 }
5430 }
5431 }
5432 }
5433
5434 if (C1.getSignificantBits() <= 64 &&
5436 // (X & -256) == 256 -> (X >> 8) == 1
5437 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5438 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5439 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5440 const APInt &AndRHSC = AndRHS->getAPIntValue();
5441 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5442 unsigned ShiftBits = AndRHSC.countr_zero();
5443 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5444 // If using an unsigned shift doesn't yield a legal compare
5445 // immediate, try using sra instead.
5446 APInt NewC = C1.lshr(ShiftBits);
5447 if (NewC.getSignificantBits() <= 64 &&
5449 APInt SignedC = C1.ashr(ShiftBits);
5450 if (SignedC.getSignificantBits() <= 64 &&
5452 SDValue Shift = DAG.getNode(
5453 ISD::SRA, dl, ShValTy, N0.getOperand(0),
5454 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5455 SDValue CmpRHS = DAG.getConstant(SignedC, dl, ShValTy);
5456 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5457 }
5458 }
5459 SDValue Shift = DAG.getNode(
5460 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5461 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5462 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5463 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5464 }
5465 }
5466 }
5467 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5468 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5469 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5470 // X < 0x100000000 -> (X >> 32) < 1
5471 // X >= 0x100000000 -> (X >> 32) >= 1
5472 // X <= 0x0ffffffff -> (X >> 32) < 1
5473 // X > 0x0ffffffff -> (X >> 32) >= 1
5474 unsigned ShiftBits;
5475 APInt NewC = C1;
5476 ISD::CondCode NewCond = Cond;
5477 if (AdjOne) {
5478 ShiftBits = C1.countr_one();
5479 NewC = NewC + 1;
5480 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5481 } else {
5482 ShiftBits = C1.countr_zero();
5483 }
5484 NewC.lshrInPlace(ShiftBits);
5485 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5487 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5488 SDValue Shift =
5489 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5490 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5491 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5492 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5493 }
5494 }
5495 }
5496 }
5497
5499 auto *CFP = cast<ConstantFPSDNode>(N1);
5500 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5501
5502 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5503 // constant if knowing that the operand is non-nan is enough. We prefer to
5504 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5505 // materialize 0.0.
5506 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5507 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5508
5509 // setcc (fneg x), C -> setcc swap(pred) x, -C
5510 if (N0.getOpcode() == ISD::FNEG) {
5512 if (DCI.isBeforeLegalizeOps() ||
5513 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5514 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5515 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5516 }
5517 }
5518
5519 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5521 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5522 bool IsFabs = N0.getOpcode() == ISD::FABS;
5523 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5524 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5525 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5526 : (IsFabs ? fcInf : fcPosInf);
5527 if (Cond == ISD::SETUEQ)
5528 Flag |= fcNan;
5529 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5530 DAG.getTargetConstant(Flag, dl, MVT::i32));
5531 }
5532 }
5533
5534 // If the condition is not legal, see if we can find an equivalent one
5535 // which is legal.
5537 // If the comparison was an awkward floating-point == or != and one of
5538 // the comparison operands is infinity or negative infinity, convert the
5539 // condition to a less-awkward <= or >=.
5540 if (CFP->getValueAPF().isInfinity()) {
5541 bool IsNegInf = CFP->getValueAPF().isNegative();
5543 switch (Cond) {
5544 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5545 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5546 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5547 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5548 default: break;
5549 }
5550 if (NewCond != ISD::SETCC_INVALID &&
5551 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5552 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5553 }
5554 }
5555 }
5556
5557 if (N0 == N1) {
5558 // The sext(setcc()) => setcc() optimization relies on the appropriate
5559 // constant being emitted.
5560 assert(!N0.getValueType().isInteger() &&
5561 "Integer types should be handled by FoldSetCC");
5562
5563 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5564 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5565 if (UOF == 2) // FP operators that are undefined on NaNs.
5566 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5567 if (UOF == unsigned(EqTrue))
5568 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5569 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5570 // if it is not already.
5571 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5572 if (NewCond != Cond &&
5573 (DCI.isBeforeLegalizeOps() ||
5574 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5575 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5576 }
5577
5578 // ~X > ~Y --> Y > X
5579 // ~X < ~Y --> Y < X
5580 // ~X < C --> X > ~C
5581 // ~X > C --> X < ~C
5582 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5583 N0.getValueType().isInteger()) {
5584 if (isBitwiseNot(N0)) {
5585 if (isBitwiseNot(N1))
5586 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5587
5590 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5591 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5592 }
5593 }
5594 }
5595
5596 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5597 N0.getValueType().isInteger()) {
5598 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5599 N0.getOpcode() == ISD::XOR) {
5600 // Simplify (X+Y) == (X+Z) --> Y == Z
5601 if (N0.getOpcode() == N1.getOpcode()) {
5602 if (N0.getOperand(0) == N1.getOperand(0))
5603 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5604 if (N0.getOperand(1) == N1.getOperand(1))
5605 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5606 if (isCommutativeBinOp(N0.getOpcode())) {
5607 // If X op Y == Y op X, try other combinations.
5608 if (N0.getOperand(0) == N1.getOperand(1))
5609 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5610 Cond);
5611 if (N0.getOperand(1) == N1.getOperand(0))
5612 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5613 Cond);
5614 }
5615 }
5616
5617 // If RHS is a legal immediate value for a compare instruction, we need
5618 // to be careful about increasing register pressure needlessly.
5619 bool LegalRHSImm = false;
5620
5621 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5622 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5623 // Turn (X+C1) == C2 --> X == C2-C1
5624 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5625 return DAG.getSetCC(
5626 dl, VT, N0.getOperand(0),
5627 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5628 dl, N0.getValueType()),
5629 Cond);
5630
5631 // Turn (X^C1) == C2 --> X == C1^C2
5632 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5633 return DAG.getSetCC(
5634 dl, VT, N0.getOperand(0),
5635 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5636 dl, N0.getValueType()),
5637 Cond);
5638 }
5639
5640 // Turn (C1-X) == C2 --> X == C1-C2
5641 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5642 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5643 return DAG.getSetCC(
5644 dl, VT, N0.getOperand(1),
5645 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5646 dl, N0.getValueType()),
5647 Cond);
5648
5649 // Could RHSC fold directly into a compare?
5650 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5651 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5652 }
5653
5654 // (X+Y) == X --> Y == 0 and similar folds.
5655 // Don't do this if X is an immediate that can fold into a cmp
5656 // instruction and X+Y has other uses. It could be an induction variable
5657 // chain, and the transform would increase register pressure.
5658 if (!LegalRHSImm || N0.hasOneUse())
5659 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5660 return V;
5661 }
5662
5663 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5664 N1.getOpcode() == ISD::XOR)
5665 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5666 return V;
5667
5668 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5669 return V;
5670
5671 if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5672 return V;
5673 }
5674
5675 // Fold remainder of division by a constant.
5676 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5677 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5678 // When division is cheap or optimizing for minimum size,
5679 // fall through to DIVREM creation by skipping this fold.
5680 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5681 if (N0.getOpcode() == ISD::UREM) {
5682 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5683 return Folded;
5684 } else if (N0.getOpcode() == ISD::SREM) {
5685 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5686 return Folded;
5687 }
5688 }
5689 }
5690
5691 // Fold away ALL boolean setcc's.
5692 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5693 SDValue Temp;
5694 switch (Cond) {
5695 default: llvm_unreachable("Unknown integer setcc!");
5696 case ISD::SETEQ: // X == Y -> ~(X^Y)
5697 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5698 N0 = DAG.getNOT(dl, Temp, OpVT);
5699 if (!DCI.isCalledByLegalizer())
5700 DCI.AddToWorklist(Temp.getNode());
5701 break;
5702 case ISD::SETNE: // X != Y --> (X^Y)
5703 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5704 break;
5705 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5706 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5707 Temp = DAG.getNOT(dl, N0, OpVT);
5708 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5709 if (!DCI.isCalledByLegalizer())
5710 DCI.AddToWorklist(Temp.getNode());
5711 break;
5712 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5713 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5714 Temp = DAG.getNOT(dl, N1, OpVT);
5715 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5716 if (!DCI.isCalledByLegalizer())
5717 DCI.AddToWorklist(Temp.getNode());
5718 break;
5719 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5720 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5721 Temp = DAG.getNOT(dl, N0, OpVT);
5722 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5723 if (!DCI.isCalledByLegalizer())
5724 DCI.AddToWorklist(Temp.getNode());
5725 break;
5726 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5727 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5728 Temp = DAG.getNOT(dl, N1, OpVT);
5729 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5730 break;
5731 }
5732 if (VT.getScalarType() != MVT::i1) {
5733 if (!DCI.isCalledByLegalizer())
5734 DCI.AddToWorklist(N0.getNode());
5735 // FIXME: If running after legalize, we probably can't do this.
5737 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5738 }
5739 return N0;
5740 }
5741
5742 // Fold (setcc (trunc x) (trunc y)) -> (setcc x y)
5743 if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE &&
5744 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
5746 N1->getFlags().hasNoUnsignedWrap()) ||
5748 N1->getFlags().hasNoSignedWrap())) &&
5750 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5751 }
5752
5753 // Fold (setcc (sub nsw a, b), zero, s??) -> (setcc a, b, s??)
5754 // TODO: Remove that .isVector() check
5755 if (VT.isVector() && isZeroOrZeroSplat(N1) && N0.getOpcode() == ISD::SUB &&
5757 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), Cond);
5758 }
5759
5760 // Could not fold it.
5761 return SDValue();
5762}
5763
5764/// Returns true (and the GlobalValue and the offset) if the node is a
5765/// GlobalAddress + offset.
5767 int64_t &Offset) const {
5768
5769 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5770
5771 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5772 GA = GASD->getGlobal();
5773 Offset += GASD->getOffset();
5774 return true;
5775 }
5776
5777 if (N->isAnyAdd()) {
5778 SDValue N1 = N->getOperand(0);
5779 SDValue N2 = N->getOperand(1);
5780 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5781 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5782 Offset += V->getSExtValue();
5783 return true;
5784 }
5785 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5786 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5787 Offset += V->getSExtValue();
5788 return true;
5789 }
5790 }
5791 }
5792
5793 return false;
5794}
5795
5797 DAGCombinerInfo &DCI) const {
5798 // Default implementation: no optimization.
5799 return SDValue();
5800}
5801
5802//===----------------------------------------------------------------------===//
5803// Inline Assembler Implementation Methods
5804//===----------------------------------------------------------------------===//
5805
5808 unsigned S = Constraint.size();
5809
5810 if (S == 1) {
5811 switch (Constraint[0]) {
5812 default: break;
5813 case 'r':
5814 return C_RegisterClass;
5815 case 'm': // memory
5816 case 'o': // offsetable
5817 case 'V': // not offsetable
5818 return C_Memory;
5819 case 'p': // Address.
5820 return C_Address;
5821 case 'n': // Simple Integer
5822 case 'E': // Floating Point Constant
5823 case 'F': // Floating Point Constant
5824 return C_Immediate;
5825 case 'i': // Simple Integer or Relocatable Constant
5826 case 's': // Relocatable Constant
5827 case 'X': // Allow ANY value.
5828 case 'I': // Target registers.
5829 case 'J':
5830 case 'K':
5831 case 'L':
5832 case 'M':
5833 case 'N':
5834 case 'O':
5835 case 'P':
5836 case '<':
5837 case '>':
5838 return C_Other;
5839 }
5840 }
5841
5842 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5843 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5844 return C_Memory;
5845 return C_Register;
5846 }
5847 return C_Unknown;
5848}
5849
5850/// Try to replace an X constraint, which matches anything, with another that
5851/// has more specific requirements based on the type of the corresponding
5852/// operand.
5853const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5854 if (ConstraintVT.isInteger())
5855 return "r";
5856 if (ConstraintVT.isFloatingPoint())
5857 return "f"; // works for many targets
5858 return nullptr;
5859}
5860
5862 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5863 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5864 return SDValue();
5865}
5866
5867/// Lower the specified operand into the Ops vector.
5868/// If it is invalid, don't add anything to Ops.
5870 StringRef Constraint,
5871 std::vector<SDValue> &Ops,
5872 SelectionDAG &DAG) const {
5873
5874 if (Constraint.size() > 1)
5875 return;
5876
5877 char ConstraintLetter = Constraint[0];
5878 switch (ConstraintLetter) {
5879 default: break;
5880 case 'X': // Allows any operand
5881 case 'i': // Simple Integer or Relocatable Constant
5882 case 'n': // Simple Integer
5883 case 's': { // Relocatable Constant
5884
5886 uint64_t Offset = 0;
5887
5888 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5889 // etc., since getelementpointer is variadic. We can't use
5890 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5891 // while in this case the GA may be furthest from the root node which is
5892 // likely an ISD::ADD.
5893 while (true) {
5894 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5895 // gcc prints these as sign extended. Sign extend value to 64 bits
5896 // now; without this it would get ZExt'd later in
5897 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5898 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5899 BooleanContent BCont = getBooleanContents(MVT::i64);
5900 ISD::NodeType ExtOpc =
5901 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5902 int64_t ExtVal =
5903 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5904 Ops.push_back(
5905 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5906 return;
5907 }
5908 if (ConstraintLetter != 'n') {
5909 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5910 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5911 GA->getValueType(0),
5912 Offset + GA->getOffset()));
5913 return;
5914 }
5915 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5916 Ops.push_back(DAG.getTargetBlockAddress(
5917 BA->getBlockAddress(), BA->getValueType(0),
5918 Offset + BA->getOffset(), BA->getTargetFlags()));
5919 return;
5920 }
5922 Ops.push_back(Op);
5923 return;
5924 }
5925 }
5926 const unsigned OpCode = Op.getOpcode();
5927 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5928 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5929 Op = Op.getOperand(1);
5930 // Subtraction is not commutative.
5931 else if (OpCode == ISD::ADD &&
5932 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5933 Op = Op.getOperand(0);
5934 else
5935 return;
5936 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5937 continue;
5938 }
5939 return;
5940 }
5941 break;
5942 }
5943 }
5944}
5945
5949
5950std::pair<unsigned, const TargetRegisterClass *>
5952 StringRef Constraint,
5953 MVT VT) const {
5954 if (!Constraint.starts_with("{"))
5955 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5956 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5957
5958 // Remove the braces from around the name.
5959 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5960
5961 std::pair<unsigned, const TargetRegisterClass *> R =
5962 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5963
5964 // Figure out which register class contains this reg.
5965 for (const TargetRegisterClass *RC : RI->regclasses()) {
5966 // If none of the value types for this register class are valid, we
5967 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5968 if (!isLegalRC(*RI, *RC))
5969 continue;
5970
5971 for (const MCPhysReg &PR : *RC) {
5972 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5973 std::pair<unsigned, const TargetRegisterClass *> S =
5974 std::make_pair(PR, RC);
5975
5976 // If this register class has the requested value type, return it,
5977 // otherwise keep searching and return the first class found
5978 // if no other is found which explicitly has the requested type.
5979 if (RI->isTypeLegalForClass(*RC, VT))
5980 return S;
5981 if (!R.second)
5982 R = S;
5983 }
5984 }
5985 }
5986
5987 return R;
5988}
5989
5990//===----------------------------------------------------------------------===//
5991// Constraint Selection.
5992
5993/// Return true of this is an input operand that is a matching constraint like
5994/// "4".
5996 assert(!ConstraintCode.empty() && "No known constraint!");
5997 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5998}
5999
6000/// If this is an input matching constraint, this method returns the output
6001/// operand it matches.
6003 assert(!ConstraintCode.empty() && "No known constraint!");
6004 return atoi(ConstraintCode.c_str());
6005}
6006
6007/// Split up the constraint string from the inline assembly value into the
6008/// specific constraints and their prefixes, and also tie in the associated
6009/// operand values.
6010/// If this returns an empty vector, and if the constraint string itself
6011/// isn't empty, there was an error parsing.
6014 const TargetRegisterInfo *TRI,
6015 const CallBase &Call) const {
6016 /// Information about all of the constraints.
6017 AsmOperandInfoVector ConstraintOperands;
6018 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
6019 unsigned maCount = 0; // Largest number of multiple alternative constraints.
6020
6021 // Do a prepass over the constraints, canonicalizing them, and building up the
6022 // ConstraintOperands list.
6023 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
6024 unsigned ResNo = 0; // ResNo - The result number of the next output.
6025 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
6026
6027 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
6028 ConstraintOperands.emplace_back(std::move(CI));
6029 AsmOperandInfo &OpInfo = ConstraintOperands.back();
6030
6031 // Update multiple alternative constraint count.
6032 if (OpInfo.multipleAlternatives.size() > maCount)
6033 maCount = OpInfo.multipleAlternatives.size();
6034
6035 OpInfo.ConstraintVT = MVT::Other;
6036
6037 // Compute the value type for each operand.
6038 switch (OpInfo.Type) {
6039 case InlineAsm::isOutput: {
6040 // Indirect outputs just consume an argument.
6041 if (OpInfo.isIndirect) {
6042 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6043 break;
6044 }
6045
6046 // The return value of the call is this value. As such, there is no
6047 // corresponding argument.
6048 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
6049 EVT VT;
6050 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
6051 VT = getAsmOperandValueType(DL, STy->getElementType(ResNo));
6052 } else {
6053 assert(ResNo == 0 && "Asm only has one result!");
6054 VT = getAsmOperandValueType(DL, Call.getType());
6055 }
6056 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6057 ++ResNo;
6058 break;
6059 }
6060 case InlineAsm::isInput:
6061 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
6062 break;
6063 case InlineAsm::isLabel:
6064 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
6065 ++LabelNo;
6066 continue;
6068 // Nothing to do.
6069 break;
6070 }
6071
6072 if (OpInfo.CallOperandVal) {
6073 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
6074 if (OpInfo.isIndirect) {
6075 OpTy = Call.getParamElementType(ArgNo);
6076 assert(OpTy && "Indirect operand must have elementtype attribute");
6077 }
6078
6079 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
6080 if (StructType *STy = dyn_cast<StructType>(OpTy))
6081 if (STy->getNumElements() == 1)
6082 OpTy = STy->getElementType(0);
6083
6084 // If OpTy is not a single value, it may be a struct/union that we
6085 // can tile with integers.
6086 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
6087 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
6088 switch (BitSize) {
6089 default: break;
6090 case 1:
6091 case 8:
6092 case 16:
6093 case 32:
6094 case 64:
6095 case 128:
6096 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
6097 break;
6098 }
6099 }
6100
6101 EVT VT = getAsmOperandValueType(DL, OpTy, true);
6102 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
6103 ArgNo++;
6104 }
6105 }
6106
6107 // If we have multiple alternative constraints, select the best alternative.
6108 if (!ConstraintOperands.empty()) {
6109 if (maCount) {
6110 unsigned bestMAIndex = 0;
6111 int bestWeight = -1;
6112 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
6113 int weight = -1;
6114 unsigned maIndex;
6115 // Compute the sums of the weights for each alternative, keeping track
6116 // of the best (highest weight) one so far.
6117 for (maIndex = 0; maIndex < maCount; ++maIndex) {
6118 int weightSum = 0;
6119 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6120 cIndex != eIndex; ++cIndex) {
6121 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6122 if (OpInfo.Type == InlineAsm::isClobber)
6123 continue;
6124
6125 // If this is an output operand with a matching input operand,
6126 // look up the matching input. If their types mismatch, e.g. one
6127 // is an integer, the other is floating point, or their sizes are
6128 // different, flag it as an maCantMatch.
6129 if (OpInfo.hasMatchingInput()) {
6130 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6131 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6132 if ((OpInfo.ConstraintVT.isInteger() !=
6133 Input.ConstraintVT.isInteger()) ||
6134 (OpInfo.ConstraintVT.getSizeInBits() !=
6135 Input.ConstraintVT.getSizeInBits())) {
6136 weightSum = -1; // Can't match.
6137 break;
6138 }
6139 }
6140 }
6141 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
6142 if (weight == -1) {
6143 weightSum = -1;
6144 break;
6145 }
6146 weightSum += weight;
6147 }
6148 // Update best.
6149 if (weightSum > bestWeight) {
6150 bestWeight = weightSum;
6151 bestMAIndex = maIndex;
6152 }
6153 }
6154
6155 // Now select chosen alternative in each constraint.
6156 for (AsmOperandInfo &cInfo : ConstraintOperands)
6157 if (cInfo.Type != InlineAsm::isClobber)
6158 cInfo.selectAlternative(bestMAIndex);
6159 }
6160 }
6161
6162 // Check and hook up tied operands, choose constraint code to use.
6163 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
6164 cIndex != eIndex; ++cIndex) {
6165 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
6166
6167 // If this is an output operand with a matching input operand, look up the
6168 // matching input. If their types mismatch, e.g. one is an integer, the
6169 // other is floating point, or their sizes are different, flag it as an
6170 // error.
6171 if (OpInfo.hasMatchingInput()) {
6172 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6173
6174 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6175 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
6176 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
6177 OpInfo.ConstraintVT);
6178 std::pair<unsigned, const TargetRegisterClass *> InputRC =
6179 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
6180 Input.ConstraintVT);
6181 const bool OutOpIsIntOrFP = OpInfo.ConstraintVT.isInteger() ||
6182 OpInfo.ConstraintVT.isFloatingPoint();
6183 const bool InOpIsIntOrFP = Input.ConstraintVT.isInteger() ||
6184 Input.ConstraintVT.isFloatingPoint();
6185 if ((OutOpIsIntOrFP != InOpIsIntOrFP) ||
6186 (MatchRC.second != InputRC.second)) {
6187 report_fatal_error("Unsupported asm: input constraint"
6188 " with a matching output constraint of"
6189 " incompatible type!");
6190 }
6191 }
6192 }
6193 }
6194
6195 return ConstraintOperands;
6196}
6197
6198/// Return a number indicating our preference for chosing a type of constraint
6199/// over another, for the purpose of sorting them. Immediates are almost always
6200/// preferrable (when they can be emitted). A higher return value means a
6201/// stronger preference for one constraint type relative to another.
6202/// FIXME: We should prefer registers over memory but doing so may lead to
6203/// unrecoverable register exhaustion later.
6204/// https://github.com/llvm/llvm-project/issues/20571
6206 switch (CT) {
6209 return 4;
6212 return 3;
6214 return 2;
6216 return 1;
6218 return 0;
6219 }
6220 llvm_unreachable("Invalid constraint type");
6221}
6222
6223/// Examine constraint type and operand type and determine a weight value.
6224/// This object must already have been set up with the operand type
6225/// and the current alternative constraint selected.
6228 AsmOperandInfo &info, int maIndex) const {
6230 if (maIndex >= (int)info.multipleAlternatives.size())
6231 rCodes = &info.Codes;
6232 else
6233 rCodes = &info.multipleAlternatives[maIndex].Codes;
6234 ConstraintWeight BestWeight = CW_Invalid;
6235
6236 // Loop over the options, keeping track of the most general one.
6237 for (const std::string &rCode : *rCodes) {
6238 ConstraintWeight weight =
6239 getSingleConstraintMatchWeight(info, rCode.c_str());
6240 if (weight > BestWeight)
6241 BestWeight = weight;
6242 }
6243
6244 return BestWeight;
6245}
6246
6247/// Examine constraint type and operand type and determine a weight value.
6248/// This object must already have been set up with the operand type
6249/// and the current alternative constraint selected.
6252 AsmOperandInfo &info, const char *constraint) const {
6254 Value *CallOperandVal = info.CallOperandVal;
6255 // If we don't have a value, we can't do a match,
6256 // but allow it at the lowest weight.
6257 if (!CallOperandVal)
6258 return CW_Default;
6259 // Look at the constraint type.
6260 switch (*constraint) {
6261 case 'i': // immediate integer.
6262 case 'n': // immediate integer with a known value.
6263 if (isa<ConstantInt>(CallOperandVal))
6264 weight = CW_Constant;
6265 break;
6266 case 's': // non-explicit intregal immediate.
6267 if (isa<GlobalValue>(CallOperandVal))
6268 weight = CW_Constant;
6269 break;
6270 case 'E': // immediate float if host format.
6271 case 'F': // immediate float.
6272 if (isa<ConstantFP>(CallOperandVal))
6273 weight = CW_Constant;
6274 break;
6275 case '<': // memory operand with autodecrement.
6276 case '>': // memory operand with autoincrement.
6277 case 'm': // memory operand.
6278 case 'o': // offsettable memory operand
6279 case 'V': // non-offsettable memory operand
6280 weight = CW_Memory;
6281 break;
6282 case 'r': // general register.
6283 case 'g': // general register, memory operand or immediate integer.
6284 // note: Clang converts "g" to "imr".
6285 if (CallOperandVal->getType()->isIntegerTy())
6286 weight = CW_Register;
6287 break;
6288 case 'X': // any operand.
6289 default:
6290 weight = CW_Default;
6291 break;
6292 }
6293 return weight;
6294}
6295
6296/// If there are multiple different constraints that we could pick for this
6297/// operand (e.g. "imr") try to pick the 'best' one.
6298/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
6299/// into seven classes:
6300/// Register -> one specific register
6301/// RegisterClass -> a group of regs
6302/// Memory -> memory
6303/// Address -> a symbolic memory reference
6304/// Immediate -> immediate values
6305/// Other -> magic values (such as "Flag Output Operands")
6306/// Unknown -> something we don't recognize yet and can't handle
6307/// Ideally, we would pick the most specific constraint possible: if we have
6308/// something that fits into a register, we would pick it. The problem here
6309/// is that if we have something that could either be in a register or in
6310/// memory that use of the register could cause selection of *other*
6311/// operands to fail: they might only succeed if we pick memory. Because of
6312/// this the heuristic we use is:
6313///
6314/// 1) If there is an 'other' constraint, and if the operand is valid for
6315/// that constraint, use it. This makes us take advantage of 'i'
6316/// constraints when available.
6317/// 2) Otherwise, pick the most general constraint present. This prefers
6318/// 'm' over 'r', for example.
6319///
6321 TargetLowering::AsmOperandInfo &OpInfo) const {
6322 ConstraintGroup Ret;
6323
6324 Ret.reserve(OpInfo.Codes.size());
6325 for (StringRef Code : OpInfo.Codes) {
6327
6328 // Indirect 'other' or 'immediate' constraints are not allowed.
6329 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6330 CType == TargetLowering::C_Register ||
6332 continue;
6333
6334 // Things with matching constraints can only be registers, per gcc
6335 // documentation. This mainly affects "g" constraints.
6336 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6337 continue;
6338
6339 Ret.emplace_back(Code, CType);
6340 }
6341
6343 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6344 });
6345
6346 return Ret;
6347}
6348
6349/// If we have an immediate, see if we can lower it. Return true if we can,
6350/// false otherwise.
6352 SDValue Op, SelectionDAG *DAG,
6353 const TargetLowering &TLI) {
6354
6355 assert((P.second == TargetLowering::C_Other ||
6356 P.second == TargetLowering::C_Immediate) &&
6357 "need immediate or other");
6358
6359 if (!Op.getNode())
6360 return false;
6361
6362 std::vector<SDValue> ResultOps;
6363 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6364 return !ResultOps.empty();
6365}
6366
6367/// Determines the constraint code and constraint type to use for the specific
6368/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6370 SDValue Op,
6371 SelectionDAG *DAG) const {
6372 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6373
6374 // Single-letter constraints ('r') are very common.
6375 if (OpInfo.Codes.size() == 1) {
6376 OpInfo.ConstraintCode = OpInfo.Codes[0];
6377 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6378 } else {
6380 if (G.empty())
6381 return;
6382
6383 unsigned BestIdx = 0;
6384 for (const unsigned E = G.size();
6385 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6386 G[BestIdx].second == TargetLowering::C_Immediate);
6387 ++BestIdx) {
6388 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6389 break;
6390 // If we're out of constraints, just pick the first one.
6391 if (BestIdx + 1 == E) {
6392 BestIdx = 0;
6393 break;
6394 }
6395 }
6396
6397 OpInfo.ConstraintCode = G[BestIdx].first;
6398 OpInfo.ConstraintType = G[BestIdx].second;
6399 }
6400
6401 // 'X' matches anything.
6402 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6403 // Constants are handled elsewhere. For Functions, the type here is the
6404 // type of the result, which is not what we want to look at; leave them
6405 // alone.
6406 Value *v = OpInfo.CallOperandVal;
6407 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6408 return;
6409 }
6410
6411 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6412 OpInfo.ConstraintCode = "i";
6413 return;
6414 }
6415
6416 // Otherwise, try to resolve it to something we know about by looking at
6417 // the actual operand type.
6418 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6419 OpInfo.ConstraintCode = Repl;
6420 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6421 }
6422 }
6423}
6424
6425/// Given an exact SDIV by a constant, create a multiplication
6426/// with the multiplicative inverse of the constant.
6427/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6429 const SDLoc &dl, SelectionDAG &DAG,
6430 SmallVectorImpl<SDNode *> &Created) {
6431 SDValue Op0 = N->getOperand(0);
6432 SDValue Op1 = N->getOperand(1);
6433 EVT VT = N->getValueType(0);
6434 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6435 EVT ShSVT = ShVT.getScalarType();
6436
6437 bool UseSRA = false;
6438 SmallVector<SDValue, 16> Shifts, Factors;
6439
6440 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6441 if (C->isZero())
6442 return false;
6443
6444 EVT CT = C->getValueType(0);
6445 APInt Divisor = C->getAPIntValue();
6446 unsigned Shift = Divisor.countr_zero();
6447 if (Shift) {
6448 Divisor.ashrInPlace(Shift);
6449 UseSRA = true;
6450 }
6451 APInt Factor = Divisor.multiplicativeInverse();
6452 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6453 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6454 return true;
6455 };
6456
6457 // Collect all magic values from the build vector.
6458 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6459 return SDValue();
6460
6461 SDValue Shift, Factor;
6462 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6463 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6464 Factor = DAG.getBuildVector(VT, dl, Factors);
6465 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6466 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6467 "Expected matchUnaryPredicate to return one element for scalable "
6468 "vectors");
6469 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6470 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6471 } else {
6472 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6473 Shift = Shifts[0];
6474 Factor = Factors[0];
6475 }
6476
6477 SDValue Res = Op0;
6478 if (UseSRA) {
6479 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, SDNodeFlags::Exact);
6480 Created.push_back(Res.getNode());
6481 }
6482
6483 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6484}
6485
6486/// Given an exact UDIV by a constant, create a multiplication
6487/// with the multiplicative inverse of the constant.
6488/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6490 const SDLoc &dl, SelectionDAG &DAG,
6491 SmallVectorImpl<SDNode *> &Created) {
6492 EVT VT = N->getValueType(0);
6493 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6494 EVT ShSVT = ShVT.getScalarType();
6495
6496 bool UseSRL = false;
6497 SmallVector<SDValue, 16> Shifts, Factors;
6498
6499 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6500 if (C->isZero())
6501 return false;
6502
6503 EVT CT = C->getValueType(0);
6504 APInt Divisor = C->getAPIntValue();
6505 unsigned Shift = Divisor.countr_zero();
6506 if (Shift) {
6507 Divisor.lshrInPlace(Shift);
6508 UseSRL = true;
6509 }
6510 // Calculate the multiplicative inverse modulo BW.
6511 APInt Factor = Divisor.multiplicativeInverse();
6512 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6513 Factors.push_back(DAG.getConstant(Factor, dl, CT));
6514 return true;
6515 };
6516
6517 SDValue Op1 = N->getOperand(1);
6518
6519 // Collect all magic values from the build vector.
6520 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6521 return SDValue();
6522
6523 SDValue Shift, Factor;
6524 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6525 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6526 Factor = DAG.getBuildVector(VT, dl, Factors);
6527 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6528 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6529 "Expected matchUnaryPredicate to return one element for scalable "
6530 "vectors");
6531 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6532 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6533 } else {
6534 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6535 Shift = Shifts[0];
6536 Factor = Factors[0];
6537 }
6538
6539 SDValue Res = N->getOperand(0);
6540 if (UseSRL) {
6541 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, SDNodeFlags::Exact);
6542 Created.push_back(Res.getNode());
6543 }
6544
6545 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6546}
6547
6549 SelectionDAG &DAG,
6550 SmallVectorImpl<SDNode *> &Created) const {
6551 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6552 if (isIntDivCheap(N->getValueType(0), Attr))
6553 return SDValue(N, 0); // Lower SDIV as SDIV
6554 return SDValue();
6555}
6556
6557SDValue
6559 SelectionDAG &DAG,
6560 SmallVectorImpl<SDNode *> &Created) const {
6561 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6562 if (isIntDivCheap(N->getValueType(0), Attr))
6563 return SDValue(N, 0); // Lower SREM as SREM
6564 return SDValue();
6565}
6566
6567/// Build sdiv by power-of-2 with conditional move instructions
6568/// Ref: "Hacker's Delight" by Henry Warren 10-1
6569/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6570/// bgez x, label
6571/// add x, x, 2**k-1
6572/// label:
6573/// sra res, x, k
6574/// neg res, res (when the divisor is negative)
6576 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6577 SmallVectorImpl<SDNode *> &Created) const {
6578 unsigned Lg2 = Divisor.countr_zero();
6579 EVT VT = N->getValueType(0);
6580
6581 SDLoc DL(N);
6582 SDValue N0 = N->getOperand(0);
6583 SDValue Zero = DAG.getConstant(0, DL, VT);
6584 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6585 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6586
6587 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6588 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6589 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6590 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6591 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6592
6593 Created.push_back(Cmp.getNode());
6594 Created.push_back(Add.getNode());
6595 Created.push_back(CMov.getNode());
6596
6597 // Divide by pow2.
6598 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, CMov,
6599 DAG.getShiftAmountConstant(Lg2, VT, DL));
6600
6601 // If we're dividing by a positive value, we're done. Otherwise, we must
6602 // negate the result.
6603 if (Divisor.isNonNegative())
6604 return SRA;
6605
6606 Created.push_back(SRA.getNode());
6607 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6608}
6609
6610/// Given an ISD::SDIV node expressing a divide by constant,
6611/// return a DAG expression to select that will generate the same value by
6612/// multiplying by a magic number.
6613/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6615 bool IsAfterLegalization,
6616 bool IsAfterLegalTypes,
6617 SmallVectorImpl<SDNode *> &Created) const {
6618 SDLoc dl(N);
6619
6620 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6621 if (N->getFlags().hasExact())
6622 return BuildExactSDIV(*this, N, dl, DAG, Created);
6623
6624 EVT VT = N->getValueType(0);
6625 EVT SVT = VT.getScalarType();
6626 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6627 EVT ShSVT = ShVT.getScalarType();
6628 unsigned EltBits = VT.getScalarSizeInBits();
6629 EVT MulVT;
6630
6631 // Check to see if we can do this.
6632 // FIXME: We should be more aggressive here.
6633 EVT QueryVT = VT;
6634 if (VT.isVector()) {
6635 // If the vector type will be legalized to a vector type with the same
6636 // element type, allow the transform before type legalization if MULHS or
6637 // SMUL_LOHI are supported.
6638 QueryVT = getLegalTypeToTransformTo(*DAG.getContext(), VT);
6639 if (!QueryVT.isVector() ||
6641 return SDValue();
6642 } else if (!isTypeLegal(VT)) {
6643 // Limit this to simple scalars for now.
6644 if (!VT.isSimple())
6645 return SDValue();
6646
6647 // If this type will be promoted to a large enough type with a legal
6648 // multiply operation, we can go ahead and do this transform.
6650 return SDValue();
6651
6652 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6653 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6654 !isOperationLegal(ISD::MUL, MulVT))
6655 return SDValue();
6656 }
6657
6658 bool HasMULHS =
6659 isOperationLegalOrCustom(ISD::MULHS, QueryVT, IsAfterLegalization);
6660 bool HasSMUL_LOHI =
6661 isOperationLegalOrCustom(ISD::SMUL_LOHI, QueryVT, IsAfterLegalization);
6662
6663 if (isTypeLegal(VT) && !HasMULHS && !HasSMUL_LOHI && MulVT == EVT()) {
6664 // If type twice as wide legal, widen and use a mul plus a shift.
6665 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
6666 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6667 // custom lowered. This is very expensive so avoid it at all costs for
6668 // constant divisors.
6669 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6672 MulVT = WideVT;
6673 }
6674
6675 if (!HasMULHS && !HasSMUL_LOHI && MulVT == EVT())
6676 return SDValue();
6677
6678 // If we're after type legalization and SVT is not legal, use the
6679 // promoted type for creating constants to avoid creating nodes with
6680 // illegal types.
6681 if (IsAfterLegalTypes && VT.isVector()) {
6682 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6683 if (SVT.bitsLT(VT.getScalarType()))
6684 return SDValue();
6685 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6686 if (ShSVT.bitsLT(ShVT.getScalarType()))
6687 return SDValue();
6688 }
6689 const unsigned SVTBits = SVT.getSizeInBits();
6690
6691 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6692
6693 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6694 if (C->isZero())
6695 return false;
6696 // Truncate the divisor to the target scalar type in case it was promoted
6697 // during type legalization.
6698 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6700 int NumeratorFactor = 0;
6701 int ShiftMask = -1;
6702
6703 if (Divisor.isOne() || Divisor.isAllOnes()) {
6704 // If d is +1/-1, we just multiply the numerator by +1/-1.
6705 NumeratorFactor = Divisor.getSExtValue();
6706 magics.Magic = 0;
6707 magics.ShiftAmount = 0;
6708 ShiftMask = 0;
6709 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6710 // If d > 0 and m < 0, add the numerator.
6711 NumeratorFactor = 1;
6712 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6713 // If d < 0 and m > 0, subtract the numerator.
6714 NumeratorFactor = -1;
6715 }
6716
6717 MagicFactors.push_back(
6718 DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT));
6719 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6720 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6721 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6722 return true;
6723 };
6724
6725 SDValue N0 = N->getOperand(0);
6726 SDValue N1 = N->getOperand(1);
6727
6728 // Collect the shifts / magic values from each element.
6729 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern, /*AllowUndefs=*/false,
6730 /*AllowTruncation=*/true))
6731 return SDValue();
6732
6733 SDValue MagicFactor, Factor, Shift, ShiftMask;
6734 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6735 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6736 Factor = DAG.getBuildVector(VT, dl, Factors);
6737 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6738 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6739 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6740 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6741 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6742 "Expected matchUnaryPredicate to return one element for scalable "
6743 "vectors");
6744 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6745 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6746 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6747 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6748 } else {
6749 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6750 MagicFactor = MagicFactors[0];
6751 Factor = Factors[0];
6752 Shift = Shifts[0];
6753 ShiftMask = ShiftMasks[0];
6754 }
6755
6756 // Multiply the numerator (operand 0) by the magic value.
6757 auto GetMULHS = [&](SDValue X, SDValue Y) {
6758 if (HasMULHS)
6759 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6760 if (HasSMUL_LOHI) {
6761 SDValue LoHi =
6762 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6763 return LoHi.getValue(1);
6764 }
6765
6766 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6767 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6768 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6769 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6770 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6771 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6772 };
6773
6774 SDValue Q = GetMULHS(N0, MagicFactor);
6775 if (!Q)
6776 return SDValue();
6777
6778 Created.push_back(Q.getNode());
6779
6780 // (Optionally) Add/subtract the numerator using Factor.
6781 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6782 Created.push_back(Factor.getNode());
6783 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6784 Created.push_back(Q.getNode());
6785
6786 // Shift right algebraic by shift value.
6787 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6788 Created.push_back(Q.getNode());
6789
6790 // Extract the sign bit, mask it and add it to the quotient.
6791 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6792 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6793 Created.push_back(T.getNode());
6794 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6795 Created.push_back(T.getNode());
6796 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6797}
6798
6799/// Given an ISD::UDIV node expressing a divide by constant,
6800/// return a DAG expression to select that will generate the same value by
6801/// multiplying by a magic number.
6802/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6804 bool IsAfterLegalization,
6805 bool IsAfterLegalTypes,
6806 SmallVectorImpl<SDNode *> &Created) const {
6807 SDLoc dl(N);
6808
6809 // If the udiv has an 'exact' bit we can use a simpler lowering.
6810 if (N->getFlags().hasExact())
6811 return BuildExactUDIV(*this, N, dl, DAG, Created);
6812
6813 EVT VT = N->getValueType(0);
6814 EVT SVT = VT.getScalarType();
6815 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6816 EVT ShSVT = ShVT.getScalarType();
6817 unsigned EltBits = VT.getScalarSizeInBits();
6818 EVT MulVT;
6819
6820 // Check to see if we can do this.
6821 // FIXME: We should be more aggressive here.
6822 EVT QueryVT = VT;
6823 if (VT.isVector()) {
6824 // If the vector type will be legalized to a vector type with the same
6825 // element type, allow the transform before type legalization if MULHU or
6826 // UMUL_LOHI are supported.
6827 QueryVT = getLegalTypeToTransformTo(*DAG.getContext(), VT);
6828 if (!QueryVT.isVector() ||
6830 return SDValue();
6831 } else if (!isTypeLegal(VT)) {
6832 // Limit this to simple scalars for now.
6833 if (!VT.isSimple())
6834 return SDValue();
6835
6836 // If this type will be promoted to a large enough type with a legal
6837 // multiply operation, we can go ahead and do this transform.
6839 return SDValue();
6840
6841 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6842 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6843 !isOperationLegal(ISD::MUL, MulVT))
6844 return SDValue();
6845 }
6846
6847 bool HasMULHU =
6848 isOperationLegalOrCustom(ISD::MULHU, QueryVT, IsAfterLegalization);
6849 bool HasUMUL_LOHI =
6850 isOperationLegalOrCustom(ISD::UMUL_LOHI, QueryVT, IsAfterLegalization);
6851
6852 if (isTypeLegal(VT) && !HasMULHU && !HasUMUL_LOHI && MulVT == EVT()) {
6853 // If type twice as wide legal, widen and use a mul plus a shift.
6854 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
6855 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6856 // custom lowered. This is very expensive so avoid it at all costs for
6857 // constant divisors.
6858 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6861 MulVT = WideVT;
6862 }
6863
6864 if (!HasMULHU && !HasUMUL_LOHI && MulVT == EVT())
6865 return SDValue();
6866
6867 SDValue N0 = N->getOperand(0);
6868 SDValue N1 = N->getOperand(1);
6869
6870 // Try to use leading zeros of the dividend to reduce the multiplier and
6871 // avoid expensive fixups.
6872 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6873
6874 // If we're after type legalization and SVT is not legal, use the
6875 // promoted type for creating constants to avoid creating nodes with
6876 // illegal types.
6877 if (IsAfterLegalTypes && VT.isVector()) {
6878 SVT = getTypeToTransformTo(*DAG.getContext(), SVT);
6879 if (SVT.bitsLT(VT.getScalarType()))
6880 return SDValue();
6881 ShSVT = getTypeToTransformTo(*DAG.getContext(), ShSVT);
6882 if (ShSVT.bitsLT(ShVT.getScalarType()))
6883 return SDValue();
6884 }
6885 const unsigned SVTBits = SVT.getSizeInBits();
6886
6887 // Allow i32 to be widened to i64 for uncooperative divisors if i64 MULHU or
6888 // UMUL_LOHI is supported.
6889 const EVT WideSVT = MVT::i64;
6890 const bool HasWideMULHU =
6891 VT == MVT::i32 &&
6892 isOperationLegalOrCustom(ISD::MULHU, WideSVT, IsAfterLegalization);
6893 const bool HasWideUMUL_LOHI =
6894 VT == MVT::i32 &&
6895 isOperationLegalOrCustom(ISD::UMUL_LOHI, WideSVT, IsAfterLegalization);
6896 const bool AllowWiden = (HasWideMULHU || HasWideUMUL_LOHI);
6897
6898 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6899 bool UseWiden = false;
6900 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6901
6902 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6903 if (C->isZero())
6904 return false;
6905 // Truncate the divisor to the target scalar type in case it was promoted
6906 // during type legalization.
6907 APInt Divisor = C->getAPIntValue().trunc(EltBits);
6908
6909 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6910
6911 // Magic algorithm doesn't work for division by 1. We need to emit a select
6912 // at the end.
6913 if (Divisor.isOne()) {
6914 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6915 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6916 } else {
6919 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()),
6920 /*AllowEvenDivisorOptimization=*/true,
6921 /*AllowWidenOptimization=*/AllowWiden);
6922
6923 if (magics.Widen) {
6924 UseWiden = true;
6925 MagicFactor = DAG.getConstant(magics.Magic, dl, WideSVT);
6926 } else {
6927 MagicFactor = DAG.getConstant(magics.Magic.zext(SVTBits), dl, SVT);
6928 }
6929
6930 assert(magics.PreShift < Divisor.getBitWidth() &&
6931 "We shouldn't generate an undefined shift!");
6932 assert(magics.PostShift < Divisor.getBitWidth() &&
6933 "We shouldn't generate an undefined shift!");
6934 assert((!magics.IsAdd || magics.PreShift == 0) &&
6935 "Unexpected pre-shift");
6936 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6937 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6938 NPQFactor = DAG.getConstant(
6939 magics.IsAdd ? APInt::getOneBitSet(SVTBits, EltBits - 1)
6940 : APInt::getZero(SVTBits),
6941 dl, SVT);
6942 UseNPQ |= magics.IsAdd;
6943 UsePreShift |= magics.PreShift != 0;
6944 UsePostShift |= magics.PostShift != 0;
6945 }
6946
6947 PreShifts.push_back(PreShift);
6948 MagicFactors.push_back(MagicFactor);
6949 NPQFactors.push_back(NPQFactor);
6950 PostShifts.push_back(PostShift);
6951 return true;
6952 };
6953
6954 // Collect the shifts/magic values from each element.
6955 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern, /*AllowUndefs=*/false,
6956 /*AllowTruncation=*/true))
6957 return SDValue();
6958
6959 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6960 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6961 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6962 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6963 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6964 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6965 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6966 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6967 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6968 "Expected matchUnaryPredicate to return one for scalable vectors");
6969 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6970 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6971 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6972 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6973 } else {
6974 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6975 PreShift = PreShifts[0];
6976 MagicFactor = MagicFactors[0];
6977 PostShift = PostShifts[0];
6978 }
6979
6980 if (UseWiden) {
6981 // Compute: (WideSVT(x) * MagicFactor) >> WideSVTBits.
6982 SDValue WideN0 = DAG.getNode(ISD::ZERO_EXTEND, dl, WideSVT, N0);
6983
6984 // Perform WideSVTxWideSVT -> 2*WideSVT multiplication and extract high
6985 // WideSVT bits
6986 SDValue High;
6987 if (HasWideMULHU) {
6988 High = DAG.getNode(ISD::MULHU, dl, WideSVT, WideN0, MagicFactor);
6989 } else {
6990 assert(HasWideUMUL_LOHI);
6991 SDValue LoHi =
6992 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(WideSVT, WideSVT),
6993 WideN0, MagicFactor);
6994 High = LoHi.getValue(1);
6995 }
6996
6997 Created.push_back(High.getNode());
6998 return DAG.getNode(ISD::TRUNCATE, dl, VT, High);
6999 }
7000
7001 SDValue Q = N0;
7002 if (UsePreShift) {
7003 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
7004 Created.push_back(Q.getNode());
7005 }
7006
7007 auto GetMULHU = [&](SDValue X, SDValue Y) {
7008 if (HasMULHU)
7009 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
7010 if (HasUMUL_LOHI) {
7011 SDValue LoHi =
7012 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
7013 return LoHi.getValue(1);
7014 }
7015
7016 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
7017 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
7018 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
7019 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
7020 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
7021 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
7022 };
7023
7024 // Multiply the numerator (operand 0) by the magic value.
7025 Q = GetMULHU(Q, MagicFactor);
7026 if (!Q)
7027 return SDValue();
7028
7029 Created.push_back(Q.getNode());
7030
7031 if (UseNPQ) {
7032 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
7033 Created.push_back(NPQ.getNode());
7034
7035 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
7036 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
7037 if (VT.isVector())
7038 NPQ = GetMULHU(NPQ, NPQFactor);
7039 else
7040 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
7041
7042 Created.push_back(NPQ.getNode());
7043
7044 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
7045 Created.push_back(Q.getNode());
7046 }
7047
7048 if (UsePostShift) {
7049 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
7050 Created.push_back(Q.getNode());
7051 }
7052
7053 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7054
7055 SDValue One = DAG.getConstant(1, dl, VT);
7056 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
7057 return DAG.getSelect(dl, VT, IsOne, N0, Q);
7058}
7059
7060/// If all values in Values that *don't* match the predicate are same 'splat'
7061/// value, then replace all values with that splat value.
7062/// Else, if AlternativeReplacement was provided, then replace all values that
7063/// do match predicate with AlternativeReplacement value.
7064static void
7066 std::function<bool(SDValue)> Predicate,
7067 SDValue AlternativeReplacement = SDValue()) {
7068 SDValue Replacement;
7069 // Is there a value for which the Predicate does *NOT* match? What is it?
7070 auto SplatValue = llvm::find_if_not(Values, Predicate);
7071 if (SplatValue != Values.end()) {
7072 // Does Values consist only of SplatValue's and values matching Predicate?
7073 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
7074 return Value == *SplatValue || Predicate(Value);
7075 })) // Then we shall replace values matching predicate with SplatValue.
7076 Replacement = *SplatValue;
7077 }
7078 if (!Replacement) {
7079 // Oops, we did not find the "baseline" splat value.
7080 if (!AlternativeReplacement)
7081 return; // Nothing to do.
7082 // Let's replace with provided value then.
7083 Replacement = AlternativeReplacement;
7084 }
7085 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
7086}
7087
7088/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
7089/// where the divisor and comparison target are constants,
7090/// return a DAG expression that will generate the same comparison result
7091/// using only multiplications, additions and shifts/rotations.
7092/// Ref: "Hacker's Delight" 10-17.
7093SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
7094 SDValue CompTargetNode,
7096 DAGCombinerInfo &DCI,
7097 const SDLoc &DL) const {
7099 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7100 DCI, DL, Built)) {
7101 for (SDNode *N : Built)
7102 DCI.AddToWorklist(N);
7103 return Folded;
7104 }
7105
7106 return SDValue();
7107}
7108
7109SDValue
7110TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
7111 SDValue CompTargetNode, ISD::CondCode Cond,
7112 DAGCombinerInfo &DCI, const SDLoc &DL,
7113 SmallVectorImpl<SDNode *> &Created) const {
7114 // fold (seteq/ne (urem N, D), C) ->
7115 // (setule/ugt (rotr (mul (sub N, C), P), K), Q)
7116 // - D must be constant, with D = D0 * 2^K where D0 is odd
7117 // - P is the multiplicative inverse of D0 modulo 2^W
7118 // - Q = floor(((2^W) - 1) / D)
7119 // where W is the width of the common type of N and D.
7120 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7121 "Only applicable for (in)equality comparisons.");
7122
7123 SelectionDAG &DAG = DCI.DAG;
7124
7125 EVT VT = REMNode.getValueType();
7126 EVT SVT = VT.getScalarType();
7127 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7128 EVT ShSVT = ShVT.getScalarType();
7129
7130 // If MUL is unavailable, we cannot proceed in any case.
7131 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7132 return SDValue();
7133
7134 bool ComparingWithAllZeros = true;
7135 bool AllComparisonsWithNonZerosAreTautological = true;
7136 bool HadTautologicalLanes = false;
7137 bool AllLanesAreTautological = true;
7138 bool HadEvenDivisor = false;
7139 bool AllDivisorsArePowerOfTwo = true;
7140 bool HadTautologicalInvertedLanes = false;
7141 SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
7142
7143 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
7144 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7145 if (CDiv->isZero())
7146 return false;
7147
7148 const APInt &D = CDiv->getAPIntValue();
7149 const APInt &Cmp = CCmp->getAPIntValue();
7150
7151 ComparingWithAllZeros &= Cmp.isZero();
7152
7153 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7154 // if C2 is not less than C1, the comparison is always false.
7155 // But we will only be able to produce the comparison that will give the
7156 // opposive tautological answer. So this lane would need to be fixed up.
7157 bool TautologicalInvertedLane = D.ule(Cmp);
7158 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
7159
7160 // If all lanes are tautological (either all divisors are ones, or divisor
7161 // is not greater than the constant we are comparing with),
7162 // we will prefer to avoid the fold.
7163 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
7164 HadTautologicalLanes |= TautologicalLane;
7165 AllLanesAreTautological &= TautologicalLane;
7166
7167 // If we are comparing with non-zero, we need'll need to subtract said
7168 // comparison value from the LHS. But there is no point in doing that if
7169 // every lane where we are comparing with non-zero is tautological..
7170 if (!Cmp.isZero())
7171 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
7172
7173 // Decompose D into D0 * 2^K
7174 unsigned K = D.countr_zero();
7175 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7176 APInt D0 = D.lshr(K);
7177
7178 // D is even if it has trailing zeros.
7179 HadEvenDivisor |= (K != 0);
7180 // D is a power-of-two if D0 is one.
7181 // If all divisors are power-of-two, we will prefer to avoid the fold.
7182 AllDivisorsArePowerOfTwo &= D0.isOne();
7183
7184 // P = inv(D0, 2^W)
7185 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7186 unsigned W = D.getBitWidth();
7187 APInt P = D0.multiplicativeInverse();
7188 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7189
7190 // Q = floor((2^W - 1) u/ D)
7191 // R = ((2^W - 1) u% D)
7192 APInt Q, R;
7194
7195 // If we are comparing with zero, then that comparison constant is okay,
7196 // else it may need to be one less than that.
7197 if (Cmp.ugt(R))
7198 Q -= 1;
7199
7201 "We are expecting that K is always less than all-ones for ShSVT");
7202
7203 // If the lane is tautological the result can be constant-folded.
7204 if (TautologicalLane) {
7205 // Set P and K amount to a bogus values so we can try to splat them.
7206 P = 0;
7207 KAmts.push_back(DAG.getAllOnesConstant(DL, ShSVT));
7208 // And ensure that comparison constant is tautological,
7209 // it will always compare true/false.
7210 Q.setAllBits();
7211 } else {
7212 KAmts.push_back(DAG.getConstant(K, DL, ShSVT));
7213 }
7214
7215 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7216 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7217 return true;
7218 };
7219
7220 SDValue N = REMNode.getOperand(0);
7221 SDValue D = REMNode.getOperand(1);
7222
7223 // Collect the values from each element.
7224 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
7225 return SDValue();
7226
7227 // If all lanes are tautological, the result can be constant-folded.
7228 if (AllLanesAreTautological)
7229 return SDValue();
7230
7231 // If this is a urem by a powers-of-two, avoid the fold since it can be
7232 // best implemented as a bit test.
7233 if (AllDivisorsArePowerOfTwo)
7234 return SDValue();
7235
7236 SDValue PVal, KVal, QVal;
7237 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7238 if (HadTautologicalLanes) {
7239 // Try to turn PAmts into a splat, since we don't care about the values
7240 // that are currently '0'. If we can't, just keep '0'`s.
7242 // Try to turn KAmts into a splat, since we don't care about the values
7243 // that are currently '-1'. If we can't, change them to '0'`s.
7245 DAG.getConstant(0, DL, ShSVT));
7246 }
7247
7248 PVal = DAG.getBuildVector(VT, DL, PAmts);
7249 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7250 QVal = DAG.getBuildVector(VT, DL, QAmts);
7251 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7252 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
7253 "Expected matchBinaryPredicate to return one element for "
7254 "SPLAT_VECTORs");
7255 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7256 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7257 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7258 } else {
7259 PVal = PAmts[0];
7260 KVal = KAmts[0];
7261 QVal = QAmts[0];
7262 }
7263
7264 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
7265 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
7266 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
7267 assert(CompTargetNode.getValueType() == N.getValueType() &&
7268 "Expecting that the types on LHS and RHS of comparisons match.");
7269 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
7270 }
7271
7272 // (mul N, P)
7273 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7274 Created.push_back(Op0.getNode());
7275
7276 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7277 // divisors as a performance improvement, since rotating by 0 is a no-op.
7278 if (HadEvenDivisor) {
7279 // We need ROTR to do this.
7280 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7281 return SDValue();
7282 // UREM: (rotr (mul N, P), K)
7283 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7284 Created.push_back(Op0.getNode());
7285 }
7286
7287 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
7288 SDValue NewCC =
7289 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7291 if (!HadTautologicalInvertedLanes)
7292 return NewCC;
7293
7294 // If any lanes previously compared always-false, the NewCC will give
7295 // always-true result for them, so we need to fixup those lanes.
7296 // Or the other way around for inequality predicate.
7297 assert(VT.isVector() && "Can/should only get here for vectors.");
7298 Created.push_back(NewCC.getNode());
7299
7300 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
7301 // if C2 is not less than C1, the comparison is always false.
7302 // But we have produced the comparison that will give the
7303 // opposive tautological answer. So these lanes would need to be fixed up.
7304 SDValue TautologicalInvertedChannels =
7305 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
7306 Created.push_back(TautologicalInvertedChannels.getNode());
7307
7308 // NOTE: we avoid letting illegal types through even if we're before legalize
7309 // ops – legalization has a hard time producing good code for this.
7310 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
7311 // If we have a vector select, let's replace the comparison results in the
7312 // affected lanes with the correct tautological result.
7313 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
7314 DL, SETCCVT, SETCCVT);
7315 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
7316 Replacement, NewCC);
7317 }
7318
7319 // Else, we can just invert the comparison result in the appropriate lanes.
7320 //
7321 // NOTE: see the note above VSELECT above.
7322 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
7323 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
7324 TautologicalInvertedChannels);
7325
7326 return SDValue(); // Don't know how to lower.
7327}
7328
7329/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
7330/// where the divisor is constant and the comparison target is zero,
7331/// return a DAG expression that will generate the same comparison result
7332/// using only multiplications, additions and shifts/rotations.
7333/// Ref: "Hacker's Delight" 10-17.
7334SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
7335 SDValue CompTargetNode,
7337 DAGCombinerInfo &DCI,
7338 const SDLoc &DL) const {
7340 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
7341 DCI, DL, Built)) {
7342 assert(Built.size() <= 7 && "Max size prediction failed.");
7343 for (SDNode *N : Built)
7344 DCI.AddToWorklist(N);
7345 return Folded;
7346 }
7347
7348 return SDValue();
7349}
7350
7351SDValue
7352TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
7353 SDValue CompTargetNode, ISD::CondCode Cond,
7354 DAGCombinerInfo &DCI, const SDLoc &DL,
7355 SmallVectorImpl<SDNode *> &Created) const {
7356 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
7357 // Fold:
7358 // (seteq/ne (srem N, D), 0)
7359 // To:
7360 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
7361 //
7362 // - D must be constant, with D = D0 * 2^K where D0 is odd
7363 // - P is the multiplicative inverse of D0 modulo 2^W
7364 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
7365 // - Q = floor((2 * A) / (2^K))
7366 // where W is the width of the common type of N and D.
7367 //
7368 // When D is a power of two (and thus D0 is 1), the normal
7369 // formula for A and Q don't apply, because the derivation
7370 // depends on D not dividing 2^(W-1), and thus theorem ZRS
7371 // does not apply. This specifically fails when N = INT_MIN.
7372 //
7373 // Instead, for power-of-two D, we use:
7374 // - A = 0
7375 // | -> No offset needed. We're effectively treating it the same as urem.
7376 // - Q = 2^(W-K) - 1
7377 // |-> Test that the top K bits are zero after rotation
7378 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
7379 "Only applicable for (in)equality comparisons.");
7380
7381 SelectionDAG &DAG = DCI.DAG;
7382
7383 EVT VT = REMNode.getValueType();
7384 EVT SVT = VT.getScalarType();
7385 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
7386 EVT ShSVT = ShVT.getScalarType();
7387
7388 // If we are after ops legalization, and MUL is unavailable, we can not
7389 // proceed.
7390 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
7391 return SDValue();
7392
7393 // TODO: Could support comparing with non-zero too.
7394 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
7395 if (!CompTarget || !CompTarget->isZero())
7396 return SDValue();
7397
7398 bool HadOneDivisor = false;
7399 bool AllDivisorsAreOnes = true;
7400 bool HadEvenDivisor = false;
7401 bool AllDivisorsArePowerOfTwo = true;
7402 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7403
7404 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7405 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7406 if (C->isZero())
7407 return false;
7408
7409 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7410
7411 // WARNING: this fold is only valid for positive divisors!
7412 // `rem %X, -C` is equivalent to `rem %X, C`
7413 APInt D = C->getAPIntValue().abs();
7414
7415 // If all divisors are ones, we will prefer to avoid the fold.
7416 HadOneDivisor |= D.isOne();
7417 AllDivisorsAreOnes &= D.isOne();
7418
7419 // Decompose D into D0 * 2^K
7420 unsigned K = D.countr_zero();
7421 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7422 APInt D0 = D.lshr(K);
7423
7424 // D is even if it has trailing zeros.
7425 HadEvenDivisor |= (K != 0);
7426
7427 // D is a power-of-two if D0 is one. This includes INT_MIN.
7428 // If all divisors are power-of-two, we will prefer to avoid the fold.
7429 AllDivisorsArePowerOfTwo &= D0.isOne();
7430
7431 // P = inv(D0, 2^W)
7432 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7433 unsigned W = D.getBitWidth();
7434 APInt P = D0.multiplicativeInverse();
7435 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7436
7437 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7438 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
7439 A.clearLowBits(K);
7440
7441 // Q = floor((2 * A) / (2^K))
7442 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7443
7445 "We are expecting that A is always less than all-ones for SVT");
7447 "We are expecting that K is always less than all-ones for ShSVT");
7448
7449 // If D was a power of two, apply the alternate constant derivation.
7450 if (D0.isOne()) {
7451 // A = 0
7452 A = APInt(W, 0);
7453 // - Q = 2^(W-K) - 1
7454 Q = APInt::getLowBitsSet(W, W - K);
7455 }
7456
7457 // If the divisor is 1 the result can be constant-folded.
7458 if (D.isOne()) {
7459 // Set P, A and K to a bogus values so we can try to splat them.
7460 P = 0;
7461 A.setAllBits();
7462 KAmts.push_back(DAG.getAllOnesConstant(DL, ShSVT));
7463
7464 // x ?% 1 == 0 <--> true <--> x u<= -1
7465 Q.setAllBits();
7466 } else {
7467 KAmts.push_back(DAG.getConstant(K, DL, ShSVT));
7468 }
7469
7470 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7471 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7472 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7473 return true;
7474 };
7475
7476 SDValue N = REMNode.getOperand(0);
7477 SDValue D = REMNode.getOperand(1);
7478
7479 // Collect the values from each element.
7480 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7481 return SDValue();
7482
7483 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7484 if (AllDivisorsAreOnes)
7485 return SDValue();
7486
7487 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7488 // since it can be best implemented as a bit test.
7489 if (AllDivisorsArePowerOfTwo)
7490 return SDValue();
7491
7492 SDValue PVal, AVal, KVal, QVal;
7493 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7494 if (HadOneDivisor) {
7495 // Try to turn PAmts into a splat, since we don't care about the values
7496 // that are currently '0'. If we can't, just keep '0'`s.
7498 // Try to turn AAmts into a splat, since we don't care about the
7499 // values that are currently '-1'. If we can't, change them to '0'`s.
7501 DAG.getConstant(0, DL, SVT));
7502 // Try to turn KAmts into a splat, since we don't care about the values
7503 // that are currently '-1'. If we can't, change them to '0'`s.
7505 DAG.getConstant(0, DL, ShSVT));
7506 }
7507
7508 PVal = DAG.getBuildVector(VT, DL, PAmts);
7509 AVal = DAG.getBuildVector(VT, DL, AAmts);
7510 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7511 QVal = DAG.getBuildVector(VT, DL, QAmts);
7512 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7513 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7514 QAmts.size() == 1 &&
7515 "Expected matchUnaryPredicate to return one element for scalable "
7516 "vectors");
7517 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7518 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7519 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7520 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7521 } else {
7522 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7523 PVal = PAmts[0];
7524 AVal = AAmts[0];
7525 KVal = KAmts[0];
7526 QVal = QAmts[0];
7527 }
7528
7529 // (mul N, P)
7530 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7531 Created.push_back(Op0.getNode());
7532
7533 // We need ADD to do this.
7534 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7535 return SDValue();
7536
7537 // (add (mul N, P), A)
7538 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7539 Created.push_back(Op0.getNode());
7540
7541 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7542 // divisors as a performance improvement, since rotating by 0 is a no-op.
7543 if (HadEvenDivisor) {
7544 // We need ROTR to do this.
7545 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7546 return SDValue();
7547 // SREM: (rotr (add (mul N, P), A), K)
7548 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7549 Created.push_back(Op0.getNode());
7550 }
7551
7552 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7553 return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7555}
7556
7558 const DenormalMode &Mode,
7559 SDNodeFlags Flags) const {
7560 SDLoc DL(Op);
7561 EVT VT = Op.getValueType();
7562 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7563 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7564
7565 // This is specifically a check for the handling of denormal inputs, not the
7566 // result.
7567 if (Mode.Input == DenormalMode::PreserveSign ||
7568 Mode.Input == DenormalMode::PositiveZero) {
7569 // Test = X == 0.0
7570 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ, /*Chain=*/{},
7571 /*Signaling=*/false, Flags);
7572 }
7573
7574 // Testing it with denormal inputs to avoid wrong estimate.
7575 //
7576 // Test = fabs(X) < SmallestNormal
7577 const fltSemantics &FltSem = VT.getFltSemantics();
7578 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7579 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7580 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op, Flags);
7581 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT, /*Chain=*/{},
7582 /*Signaling=*/false, Flags);
7583}
7584
7586 bool LegalOps, bool OptForSize,
7588 unsigned Depth) const {
7589 // fneg is removable even if it has multiple uses.
7590 if (Op.getOpcode() == ISD::FNEG) {
7592 return Op.getOperand(0);
7593 }
7594
7595 // Don't recurse exponentially.
7597 return SDValue();
7598
7599 // Pre-increment recursion depth for use in recursive calls.
7600 ++Depth;
7601 const SDNodeFlags Flags = Op->getFlags();
7602 EVT VT = Op.getValueType();
7603 unsigned Opcode = Op.getOpcode();
7604
7605 // Don't allow anything with multiple uses unless we know it is free.
7606 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7607 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7608 isFPExtFree(VT, Op.getOperand(0).getValueType());
7609 if (!IsFreeExtend)
7610 return SDValue();
7611 }
7612
7613 auto RemoveDeadNode = [&](SDValue N) {
7614 if (N && N.getNode()->use_empty())
7615 DAG.RemoveDeadNode(N.getNode());
7616 };
7617
7618 SDLoc DL(Op);
7619
7620 // Because getNegatedExpression can delete nodes we need a handle to keep
7621 // temporary nodes alive in case the recursion manages to create an identical
7622 // node.
7623 std::list<HandleSDNode> Handles;
7624
7625 switch (Opcode) {
7626 case ISD::ConstantFP: {
7627 // Don't invert constant FP values after legalization unless the target says
7628 // the negated constant is legal.
7629 bool IsOpLegal =
7631 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7632 OptForSize);
7633
7634 if (LegalOps && !IsOpLegal)
7635 break;
7636
7637 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7638 V.changeSign();
7639 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7640
7641 // If we already have the use of the negated floating constant, it is free
7642 // to negate it even it has multiple uses.
7643 if (!Op.hasOneUse() && CFP.use_empty())
7644 break;
7646 return CFP;
7647 }
7648 case ISD::SPLAT_VECTOR: {
7649 // fold splat_vector(fneg(X)) -> splat_vector(-X)
7650 SDValue X = Op.getOperand(0);
7652 break;
7653
7654 SDValue NegX = getCheaperNegatedExpression(X, DAG, LegalOps, OptForSize);
7655 if (!NegX)
7656 break;
7658 return DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, NegX);
7659 }
7660 case ISD::BUILD_VECTOR: {
7661 // Only permit BUILD_VECTOR of constants.
7662 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7663 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7664 }))
7665 break;
7666
7667 bool IsOpLegal =
7670 llvm::all_of(Op->op_values(), [&](SDValue N) {
7671 return N.isUndef() ||
7672 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7673 OptForSize);
7674 });
7675
7676 if (LegalOps && !IsOpLegal)
7677 break;
7678
7680 for (SDValue C : Op->op_values()) {
7681 if (C.isUndef()) {
7682 Ops.push_back(C);
7683 continue;
7684 }
7685 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7686 V.changeSign();
7687 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7688 }
7690 return DAG.getBuildVector(VT, DL, Ops);
7691 }
7692 case ISD::FADD: {
7693 if (!Flags.hasNoSignedZeros())
7694 break;
7695
7696 // After operation legalization, it might not be legal to create new FSUBs.
7697 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7698 break;
7699 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7700
7701 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7703 SDValue NegX =
7704 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7705 // Prevent this node from being deleted by the next call.
7706 if (NegX)
7707 Handles.emplace_back(NegX);
7708
7709 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7711 SDValue NegY =
7712 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7713
7714 // We're done with the handles.
7715 Handles.clear();
7716
7717 // Negate the X if its cost is less or equal than Y.
7718 if (NegX && (CostX <= CostY)) {
7719 Cost = CostX;
7720 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7721 if (NegY != N)
7722 RemoveDeadNode(NegY);
7723 return N;
7724 }
7725
7726 // Negate the Y if it is not expensive.
7727 if (NegY) {
7728 Cost = CostY;
7729 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7730 if (NegX != N)
7731 RemoveDeadNode(NegX);
7732 return N;
7733 }
7734 break;
7735 }
7736 case ISD::FSUB: {
7737 // We can't turn -(A-B) into B-A when we honor signed zeros.
7738 if (!Flags.hasNoSignedZeros())
7739 break;
7740
7741 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7742 // fold (fneg (fsub 0, Y)) -> Y
7743 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7744 if (C->isZero()) {
7746 return Y;
7747 }
7748
7749 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7751 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7752 }
7753 case ISD::FMUL:
7754 case ISD::FDIV: {
7755 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7756
7757 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7759 SDValue NegX =
7760 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7761 // Prevent this node from being deleted by the next call.
7762 if (NegX)
7763 Handles.emplace_back(NegX);
7764
7765 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7767 SDValue NegY =
7768 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7769
7770 // We're done with the handles.
7771 Handles.clear();
7772
7773 // Negate the X if its cost is less or equal than Y.
7774 if (NegX && (CostX <= CostY)) {
7775 Cost = CostX;
7776 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7777 if (NegY != N)
7778 RemoveDeadNode(NegY);
7779 return N;
7780 }
7781
7782 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7783 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7784 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7785 break;
7786
7787 // Negate the Y if it is not expensive.
7788 if (NegY) {
7789 Cost = CostY;
7790 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7791 if (NegX != N)
7792 RemoveDeadNode(NegX);
7793 return N;
7794 }
7795 break;
7796 }
7797 case ISD::FMA:
7798 case ISD::FMULADD:
7799 case ISD::FMAD: {
7800 if (!Flags.hasNoSignedZeros())
7801 break;
7802
7803 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7805 SDValue NegZ =
7806 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7807 // Give up if fail to negate the Z.
7808 if (!NegZ)
7809 break;
7810
7811 // Prevent this node from being deleted by the next two calls.
7812 Handles.emplace_back(NegZ);
7813
7814 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7816 SDValue NegX =
7817 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7818 // Prevent this node from being deleted by the next call.
7819 if (NegX)
7820 Handles.emplace_back(NegX);
7821
7822 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7824 SDValue NegY =
7825 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7826
7827 // We're done with the handles.
7828 Handles.clear();
7829
7830 // Negate the X if its cost is less or equal than Y.
7831 if (NegX && (CostX <= CostY)) {
7832 Cost = std::min(CostX, CostZ);
7833 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7834 if (NegY != N)
7835 RemoveDeadNode(NegY);
7836 return N;
7837 }
7838
7839 // Negate the Y if it is not expensive.
7840 if (NegY) {
7841 Cost = std::min(CostY, CostZ);
7842 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7843 if (NegX != N)
7844 RemoveDeadNode(NegX);
7845 return N;
7846 }
7847 break;
7848 }
7849
7850 case ISD::FP_EXTEND:
7851 case ISD::FSIN:
7852 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7853 OptForSize, Cost, Depth))
7854 return DAG.getNode(Opcode, DL, VT, NegV);
7855 break;
7856 case ISD::FP_ROUND:
7857 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7858 OptForSize, Cost, Depth))
7859 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7860 break;
7861 case ISD::SELECT:
7862 case ISD::VSELECT: {
7863 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7864 // iff at least one cost is cheaper and the other is neutral/cheaper
7865 SDValue LHS = Op.getOperand(1);
7867 SDValue NegLHS =
7868 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7869 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7870 RemoveDeadNode(NegLHS);
7871 break;
7872 }
7873
7874 // Prevent this node from being deleted by the next call.
7875 Handles.emplace_back(NegLHS);
7876
7877 SDValue RHS = Op.getOperand(2);
7879 SDValue NegRHS =
7880 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7881
7882 // We're done with the handles.
7883 Handles.clear();
7884
7885 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7886 (CostLHS != NegatibleCost::Cheaper &&
7887 CostRHS != NegatibleCost::Cheaper)) {
7888 RemoveDeadNode(NegLHS);
7889 RemoveDeadNode(NegRHS);
7890 break;
7891 }
7892
7893 Cost = std::min(CostLHS, CostRHS);
7894 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7895 }
7896 }
7897
7898 return SDValue();
7899}
7900
7901//===----------------------------------------------------------------------===//
7902// Legalization Utilities
7903//===----------------------------------------------------------------------===//
7904
7905bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7906 SDValue LHS, SDValue RHS,
7908 EVT HiLoVT, SelectionDAG &DAG,
7909 MulExpansionKind Kind, SDValue LL,
7910 SDValue LH, SDValue RL, SDValue RH) const {
7911 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7912 Opcode == ISD::SMUL_LOHI);
7913
7914 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7916 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7918 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7920 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7922
7923 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7924 return false;
7925
7926 unsigned OuterBitSize = VT.getScalarSizeInBits();
7927 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7928
7929 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7930 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7931 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7932
7933 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7934 bool Signed) -> bool {
7935 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7936 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7937 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7938 Hi = Lo.getValue(1);
7939 return true;
7940 }
7941 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7942 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7943 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7944 return true;
7945 }
7946 return false;
7947 };
7948
7949 SDValue Lo, Hi;
7950
7951 if (!LL.getNode() && !RL.getNode() &&
7953 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7954 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7955 }
7956
7957 if (!LL.getNode())
7958 return false;
7959
7960 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7961 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7962 DAG.MaskedValueIsZero(RHS, HighMask)) {
7963 // The inputs are both zero-extended.
7964 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7965 Result.push_back(Lo);
7966 Result.push_back(Hi);
7967 if (Opcode != ISD::MUL) {
7968 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7969 Result.push_back(Zero);
7970 Result.push_back(Zero);
7971 }
7972 return true;
7973 }
7974 }
7975
7976 if (!VT.isVector() && Opcode == ISD::MUL &&
7977 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7978 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7979 // The input values are both sign-extended.
7980 // TODO non-MUL case?
7981 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7982 Result.push_back(Lo);
7983 Result.push_back(Hi);
7984 return true;
7985 }
7986 }
7987
7988 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7989 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7990
7991 if (!LH.getNode() && !RH.getNode() &&
7994 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7995 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7996 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7997 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7998 }
7999
8000 if (!LH.getNode())
8001 return false;
8002
8003 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
8004 return false;
8005
8006 Result.push_back(Lo);
8007
8008 if (Opcode == ISD::MUL) {
8009 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
8010 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
8011 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
8012 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
8013 Result.push_back(Hi);
8014 return true;
8015 }
8016
8017 // Compute the full width result.
8018 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
8019 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
8020 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
8021 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
8022 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
8023 };
8024
8025 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
8026 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
8027 return false;
8028
8029 // This is effectively the add part of a multiply-add of half-sized operands,
8030 // so it cannot overflow.
8031 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8032
8033 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
8034 return false;
8035
8036 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
8037 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8038
8039 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
8041 if (UseGlue)
8042 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
8043 Merge(Lo, Hi));
8044 else
8045 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
8046 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
8047
8048 SDValue Carry = Next.getValue(1);
8049 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8050 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8051
8052 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
8053 return false;
8054
8055 if (UseGlue)
8056 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
8057 Carry);
8058 else
8059 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
8060 Zero, Carry);
8061
8062 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
8063
8064 if (Opcode == ISD::SMUL_LOHI) {
8065 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8066 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
8067 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
8068
8069 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
8070 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
8071 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
8072 }
8073
8074 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8075 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
8076 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
8077 return true;
8078}
8079
8081 SelectionDAG &DAG, MulExpansionKind Kind,
8082 SDValue LL, SDValue LH, SDValue RL,
8083 SDValue RH) const {
8085 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
8086 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
8087 DAG, Kind, LL, LH, RL, RH);
8088 if (Ok) {
8089 assert(Result.size() == 2);
8090 Lo = Result[0];
8091 Hi = Result[1];
8092 }
8093 return Ok;
8094}
8095
8096// Optimize unsigned division or remainder by constants for types twice as large
8097// as a legal VT.
8098//
8099// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
8100// can be computed
8101// as:
8102// Sum = __builtin_uadd_overflow(Lo, High, &Sum);
8103// Remainder = Sum % Constant;
8104//
8105// If (1 << (BitWidth / 2)) % Constant != 1, we can search for a smaller value
8106// W such that W != (BitWidth / 2) and (1 << W) % Constant == 1. We can break
8107// High:Low into 3 chunks of W bits and compute remainder as
8108// Sum = Chunk0 + Chunk1 + Chunk2;
8109// Remainder = Sum % Constant;
8110//
8111// This is based on "Remainder by Summing Digits" from Hacker's Delight.
8112//
8113// For division, we can compute the remainder using the algorithm described
8114// above, subtract it from the dividend to get an exact multiple of Constant.
8115// Then multiply that exact multiply by the multiplicative inverse modulo
8116// (1 << (BitWidth / 2)) to get the quotient.
8117
8118// If Constant is even, we can shift right the dividend and the divisor by the
8119// number of trailing zeros in Constant before applying the remainder algorithm.
8120// If we're after the quotient, we can subtract this value from the shifted
8121// dividend and multiply by the multiplicative inverse of the shifted divisor.
8122// If we want the remainder, we shift the value left by the number of trailing
8123// zeros and add the bits that were shifted out of the dividend.
8124bool TargetLowering::expandUDIVREMByConstantViaUREMDecomposition(
8125 SDNode *N, APInt Divisor, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
8126 SelectionDAG &DAG, SDValue LL, SDValue LH) const {
8127 unsigned Opcode = N->getOpcode();
8128 EVT VT = N->getValueType(0);
8129
8130 unsigned BitWidth = Divisor.getBitWidth();
8131 unsigned HBitWidth = BitWidth / 2;
8133 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
8134
8135 // If the divisor is even, shift it until it becomes odd.
8136 unsigned TrailingZeros = 0;
8137 if (!Divisor[0]) {
8138 TrailingZeros = Divisor.countr_zero();
8139 Divisor.lshrInPlace(TrailingZeros);
8140 }
8141
8142 // After removing trailing zeros, the divisor needs to be less than
8143 // (1 << HBitWidth).
8144 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
8145 if (Divisor.uge(HalfMaxPlus1))
8146 return false;
8147
8148 // Look for the largest chunk width W such that (1 << W) % Divisor == 1 or
8149 // (1 << W) % Divisor == -1.
8150 unsigned BestChunkWidth = 0, AltChunkWidth = 0;
8151 for (unsigned I = HBitWidth, E = HBitWidth / 2; I > E; --I) {
8152 // Skip HBitWidth-1, it doesn't have enough bits for carries.
8153 if (I == HBitWidth - 1)
8154 continue;
8155
8156 APInt Mod = APInt::getOneBitSet(Divisor.getBitWidth(), I).urem(Divisor);
8157
8158 if (Mod.isOne()) {
8159 BestChunkWidth = I;
8160 break;
8161 }
8162
8163 // We have an alternate strategy for Remainder == Divisor - 1.
8164 // FIXME: Support HBitWidth.
8165 if (I != HBitWidth && Mod == Divisor - 1)
8166 AltChunkWidth = I;
8167 }
8168
8169 bool Alternate = false;
8170 if (!BestChunkWidth) {
8171 if (!AltChunkWidth)
8172 return false;
8173 Alternate = true;
8174 BestChunkWidth = AltChunkWidth;
8175 }
8176
8177 SDLoc dl(N);
8178
8179 assert(!LL == !LH && "Expected both input halves or no input halves!");
8180 if (!LL)
8181 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
8182
8183 bool HasFSHR = isOperationLegal(ISD::FSHR, HiLoVT);
8184
8185 auto GetFSHR = [&](SDValue Lo, SDValue Hi, unsigned ShiftAmt) {
8186 assert(ShiftAmt > 0 && ShiftAmt < HBitWidth);
8187 if (HasFSHR)
8188 return DAG.getNode(ISD::FSHR, dl, HiLoVT, Hi, Lo,
8189 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl));
8190 return DAG.getNode(
8191 ISD::OR, dl, HiLoVT,
8192 DAG.getNode(ISD::SRL, dl, HiLoVT, Lo,
8193 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl)),
8194 DAG.getNode(
8195 ISD::SHL, dl, HiLoVT, Hi,
8196 DAG.getShiftAmountConstant(HBitWidth - ShiftAmt, HiLoVT, dl)));
8197 };
8198
8199 // Helper to perform a right shift on a 128-bit value split into two halves.
8200 // Handles shifts >= HBitWidth by moving Hi to Lo and shifting Hi.
8201 auto ShiftRight = [&](SDValue &Lo, SDValue &Hi, unsigned ShiftAmt) {
8202 if (ShiftAmt == 0)
8203 return;
8204 if (ShiftAmt < HBitWidth) {
8205 Lo = GetFSHR(Lo, Hi, ShiftAmt);
8206 Hi = DAG.getNode(ISD::SRL, dl, HiLoVT, Hi,
8207 DAG.getShiftAmountConstant(ShiftAmt, HiLoVT, dl));
8208 } else if (ShiftAmt == HBitWidth) {
8209 Lo = Hi;
8210 Hi = DAG.getConstant(0, dl, HiLoVT);
8211 } else {
8212 Lo = DAG.getNode(
8213 ISD::SRL, dl, HiLoVT, Hi,
8214 DAG.getShiftAmountConstant(ShiftAmt - HBitWidth, HiLoVT, dl));
8215 Hi = DAG.getConstant(0, dl, HiLoVT);
8216 }
8217 };
8218
8219 // Shift the input by the number of TrailingZeros in the divisor. The
8220 // shifted out bits will be added to the remainder later.
8221 SDValue PartialRemL, PartialRemH;
8222 if (TrailingZeros && Opcode != ISD::UDIV) {
8223 // Save the shifted off bits if we need the remainder.
8224 if (TrailingZeros < HBitWidth) {
8225 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
8226 PartialRemL = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
8227 DAG.getConstant(Mask, dl, HiLoVT));
8228 } else if (TrailingZeros == HBitWidth) {
8229 // All of LL is part of the remainder.
8230 PartialRemL = LL;
8231 } else {
8232 // TrailingZeros > HBitWidth: LL and part of LH are the remainder.
8233 PartialRemL = LL;
8234 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros - HBitWidth);
8235 PartialRemH = DAG.getNode(ISD::AND, dl, HiLoVT, LH,
8236 DAG.getConstant(Mask, dl, HiLoVT));
8237 }
8238 }
8239
8240 SDValue Sum;
8241 // If BestChunkWidth is HBitWidth add low and high half. If there is a carry
8242 // out, add that to the final sum.
8243 if (BestChunkWidth == HBitWidth) {
8244 assert(!Alternate);
8245 // Shift LH:LL right if there were trailing zeros in the divisor.
8246 ShiftRight(LL, LH, TrailingZeros);
8247
8248 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
8249 EVT SetCCType =
8250 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
8252 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
8253 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
8254 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
8255 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
8256 } else {
8257 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
8258 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
8259 // If the boolean for the target is 0 or 1, we can add the setcc result
8260 // directly.
8261 if (getBooleanContents(HiLoVT) ==
8263 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
8264 else
8265 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
8266 DAG.getConstant(0, dl, HiLoVT));
8267 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
8268 }
8269 } else {
8270 // Otherwise split into multple chunks and add them together. We chose
8271 // BestChunkWidth so that the sum will not overflow.
8272 SDValue Mask = DAG.getConstant(
8273 APInt::getLowBitsSet(HBitWidth, BestChunkWidth), dl, HiLoVT);
8274
8275 for (unsigned I = 0; I < BitWidth - TrailingZeros; I += BestChunkWidth) {
8276 // If there were trailing zeros in the divisor, increase the shift amount.
8277 unsigned Shift = I + TrailingZeros;
8278 SDValue Chunk;
8279 if (Shift == 0)
8280 Chunk = LL;
8281 else if (Shift >= HBitWidth)
8282 Chunk = DAG.getNode(
8283 ISD::SRL, dl, HiLoVT, LH,
8284 DAG.getShiftAmountConstant(Shift - HBitWidth, HiLoVT, dl));
8285 else
8286 Chunk = GetFSHR(LL, LH, Shift);
8287 // If we're on the last chunk, we don't need an AND.
8288 if (I + BestChunkWidth < BitWidth - TrailingZeros)
8289 Chunk = DAG.getNode(ISD::AND, dl, HiLoVT, Chunk, Mask);
8290 if (!Sum) {
8291 Sum = Chunk;
8292 } else {
8293 // For Alternate, we need to subtract odd chunks.
8294 unsigned ChunkNum = I / BestChunkWidth;
8295 unsigned Opc = (Alternate && (ChunkNum % 2) != 0) ? ISD::SUB : ISD::ADD;
8296 Sum = DAG.getNode(Opc, dl, HiLoVT, Sum, Chunk);
8297 }
8298 }
8299
8300 // For Alternate, the sum may be negative, but we need a positive sum. We
8301 // can increase it by a multiple of the divisor to make it positive. For 3
8302 // chunks the largest negative value is -(2^BestChunkWidth - 1). For 4
8303 // chunks, it's 2*-(2^BestChunkWidth - 1). We know that 2^BestChunkWidth + 1
8304 // is a multiple of the divisor. Add that 1 or 2 times to make the sum
8305 // positive.
8306 if (Alternate) {
8307 unsigned NumChunks = divideCeil(BitWidth - TrailingZeros, BestChunkWidth);
8308 assert(NumChunks <= 4);
8309
8310 APInt Adjust = APInt::getOneBitSet(HBitWidth, BestChunkWidth);
8311 Adjust.setBit(0);
8312 // If there are 4 chunks, we need to adjust twice.
8313 if (NumChunks == 4)
8314 Adjust <<= 1;
8315 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum,
8316 DAG.getConstant(Adjust, dl, HiLoVT));
8317 }
8318 }
8319
8320 // Perform a HiLoVT urem on the Sum using truncated divisor.
8321 SDValue RemL =
8322 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
8323 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
8324 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
8325
8326 if (Opcode != ISD::UREM) {
8327 // If we didn't shift LH/LR earlier, do it now.
8328 if (BestChunkWidth != HBitWidth)
8329 ShiftRight(LL, LH, TrailingZeros);
8330
8331 // Subtract the remainder from the shifted dividend.
8332 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
8333 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
8334
8335 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
8336
8337 // Multiply by the multiplicative inverse of the divisor modulo
8338 // (1 << BitWidth).
8339 APInt MulFactor = Divisor.multiplicativeInverse();
8340
8341 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
8342 DAG.getConstant(MulFactor, dl, VT));
8343
8344 // Split the quotient into low and high parts.
8345 SDValue QuotL, QuotH;
8346 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
8347 Result.push_back(QuotL);
8348 Result.push_back(QuotH);
8349 }
8350
8351 if (Opcode != ISD::UDIV) {
8352 // If we shifted the input, shift the remainder left and add the bits we
8353 // shifted off the input.
8354 if (TrailingZeros) {
8355 if (TrailingZeros < HBitWidth) {
8356 // Shift RemH:RemL left by TrailingZeros.
8357 // RemH gets the high bits shifted out of RemL.
8358 RemH = DAG.getNode(
8359 ISD::SRL, dl, HiLoVT, RemL,
8360 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros, HiLoVT, dl));
8361 RemL =
8362 DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
8363 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
8364 // OR in the partial remainder.
8365 RemL = DAG.getNode(ISD::OR, dl, HiLoVT, RemL, PartialRemL,
8367 } else if (TrailingZeros == HBitWidth) {
8368 // Shift left by exactly HBitWidth: RemH becomes RemL, RemL becomes
8369 // PartialRemL.
8370 RemH = RemL;
8371 RemL = PartialRemL;
8372 } else {
8373 // Shift left by more than HBitWidth.
8374 RemH = DAG.getNode(
8375 ISD::SHL, dl, HiLoVT, RemL,
8376 DAG.getShiftAmountConstant(TrailingZeros - HBitWidth, HiLoVT, dl));
8377 RemH = DAG.getNode(ISD::OR, dl, HiLoVT, RemH, PartialRemH,
8379 RemL = PartialRemL;
8380 }
8381 }
8382 Result.push_back(RemL);
8383 Result.push_back(RemH);
8384 }
8385
8386 return true;
8387}
8388
8389bool TargetLowering::expandUDIVREMByConstantViaUMulHiMagic(
8390 SDNode *N, const APInt &Divisor, SmallVectorImpl<SDValue> &Result,
8391 EVT HiLoVT, SelectionDAG &DAG, SDValue LL, SDValue LH) const {
8392
8393 SDValue N0 = N->getOperand(0);
8394 EVT VT = N0->getValueType(0);
8395 SDLoc DL{N};
8396
8397 assert(!Divisor.isOne() && "Magic algorithm does not work for division by 1");
8398
8399 // This helper creates a MUL_LOHI of the pair (LL, LH) by a constant.
8400 auto MakeMUL_LOHIByConst = [&](unsigned Opc, SDValue LL, SDValue LH,
8401 const APInt &Const,
8402 SmallVectorImpl<SDValue> &Result) {
8403 SDValue LHS = DAG.getNode(ISD::BUILD_PAIR, DL, VT, LL, LH);
8404 SDValue RHS = DAG.getConstant(Const, DL, VT);
8405 auto [RL, RH] = DAG.SplitScalar(RHS, DL, HiLoVT, HiLoVT);
8406 return expandMUL_LOHI(Opc, VT, DL, LHS, RHS, Result, HiLoVT, DAG,
8408 LL, LH, RL, RH);
8409 };
8410
8411 // This helper creates an ADD/SUB of the pairs (LL, LH) and (RL, RH).
8412 auto MakeAddSubLong = [&](unsigned Opc, SDValue LL, SDValue LH, SDValue RL,
8413 SDValue RH) {
8414 SDValue AddSubNode =
8416 DAG.getVTList(HiLoVT, MVT::i1), LL, RL);
8417 SDValue OutL = AddSubNode.getValue(0);
8418 SDValue Overflow = AddSubNode.getValue(1);
8419 SDValue AddSubWithOverflow =
8421 DAG.getVTList(HiLoVT, MVT::i1), LH, RH, Overflow);
8422 SDValue OutH = AddSubWithOverflow.getValue(0);
8423 return std::make_pair(OutL, OutH);
8424 };
8425
8426 // This helper creates a SRL of the pair (LL, LH) by Shift.
8427 auto MakeSRLLong = [&](SDValue LL, SDValue LH, unsigned Shift) {
8428 unsigned HBitWidth = HiLoVT.getScalarSizeInBits();
8429 if (Shift < HBitWidth) {
8430 SDValue ShAmt = DAG.getShiftAmountConstant(Shift, HiLoVT, DL);
8431 SDValue ResL = DAG.getNode(ISD::FSHR, DL, HiLoVT, LH, LL, ShAmt);
8432 SDValue ResH = DAG.getNode(ISD::SRL, DL, HiLoVT, LH, ShAmt);
8433 return std::make_pair(ResL, ResH);
8434 }
8435 SDValue Zero = DAG.getConstant(0, DL, HiLoVT);
8436 if (Shift == HBitWidth)
8437 return std::make_pair(LH, Zero);
8438 assert(Shift - HBitWidth < HBitWidth &&
8439 "We shouldn't generate an undefined shift");
8440 SDValue ShAmt = DAG.getShiftAmountConstant(Shift - HBitWidth, HiLoVT, DL);
8441 return std::make_pair(DAG.getNode(ISD::SRL, DL, HiLoVT, LH, ShAmt), Zero);
8442 };
8443
8444 // Knowledge of leading zeros may help to reduce the multiplier.
8445 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
8446
8447 UnsignedDivisionByConstantInfo Magics = UnsignedDivisionByConstantInfo::get(
8448 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
8449
8450 assert(!LL == !LH && "Expected both input halves or no input halves!");
8451 if (!LL)
8452 std::tie(LL, LH) = DAG.SplitScalar(N0, DL, HiLoVT, HiLoVT);
8453 SDValue QL = LL;
8454 SDValue QH = LH;
8455 if (Magics.PreShift != 0)
8456 std::tie(QL, QH) = MakeSRLLong(QL, QH, Magics.PreShift);
8457
8458 SmallVector<SDValue, 4> UMulResult;
8459 if (!MakeMUL_LOHIByConst(ISD::UMUL_LOHI, QL, QH, Magics.Magic, UMulResult))
8460 return false;
8461
8462 QL = UMulResult[2];
8463 QH = UMulResult[3];
8464
8465 if (Magics.IsAdd) {
8466 auto [NPQL, NPQH] = MakeAddSubLong(ISD::SUB, LL, LH, QL, QH);
8467 std::tie(NPQL, NPQH) = MakeSRLLong(NPQL, NPQH, 1);
8468 std::tie(QL, QH) = MakeAddSubLong(ISD::ADD, NPQL, NPQH, QL, QH);
8469 }
8470
8471 if (Magics.PostShift != 0)
8472 std::tie(QL, QH) = MakeSRLLong(QL, QH, Magics.PostShift);
8473
8474 unsigned Opcode = N->getOpcode();
8475 if (Opcode != ISD::UREM) {
8476 Result.push_back(QL);
8477 Result.push_back(QH);
8478 }
8479
8480 if (Opcode != ISD::UDIV) {
8481 SmallVector<SDValue, 2> MulResult;
8482 if (!MakeMUL_LOHIByConst(ISD::MUL, QL, QH, Divisor, MulResult))
8483 return false;
8484
8485 assert(MulResult.size() == 2);
8486
8487 auto [RemL, RemH] =
8488 MakeAddSubLong(ISD::SUB, LL, LH, MulResult[0], MulResult[1]);
8489
8490 Result.push_back(RemL);
8491 Result.push_back(RemH);
8492 }
8493
8494 return true;
8495}
8496
8499 EVT HiLoVT, SelectionDAG &DAG,
8500 SDValue LL, SDValue LH) const {
8501 unsigned Opcode = N->getOpcode();
8502
8503 // TODO: Support signed division/remainder.
8504 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
8505 return false;
8506 assert(
8507 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
8508 "Unexpected opcode");
8509
8510 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
8511 if (!CN)
8512 return false;
8513
8514 APInt Divisor = CN->getAPIntValue();
8515
8516 // We depend on the UREM by constant optimization in DAGCombiner that requires
8517 // high multiply.
8518 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
8520 return false;
8521
8522 // Don't expand if optimizing for size.
8523 if (DAG.shouldOptForSize())
8524 return false;
8525
8526 // Early out for 0 or 1 divisors.
8527 if (Divisor.ule(1))
8528 return false;
8529
8530 if (expandUDIVREMByConstantViaUREMDecomposition(N, Divisor, Result, HiLoVT,
8531 DAG, LL, LH))
8532 return true;
8533
8534 if (expandUDIVREMByConstantViaUMulHiMagic(N, Divisor, Result, HiLoVT, DAG, LL,
8535 LH))
8536 return true;
8537
8538 return false;
8539}
8540
8541// Check that (every element of) Z is undef or not an exact multiple of BW.
8542static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
8544 Z,
8545 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
8546 /*AllowUndefs=*/true, /*AllowTruncation=*/true);
8547}
8548
8550 EVT VT = Node->getValueType(0);
8551 SDValue ShX, ShY;
8552 SDValue ShAmt, InvShAmt;
8553 SDValue X = Node->getOperand(0);
8554 SDValue Y = Node->getOperand(1);
8555 SDValue Z = Node->getOperand(2);
8556 SDValue Mask = Node->getOperand(3);
8557 SDValue VL = Node->getOperand(4);
8558
8559 unsigned BW = VT.getScalarSizeInBits();
8560 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
8561 SDLoc DL(SDValue(Node, 0));
8562
8563 EVT ShVT = Z.getValueType();
8564 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8565 // fshl: X << C | Y >> (BW - C)
8566 // fshr: X << (BW - C) | Y >> C
8567 // where C = Z % BW is not zero
8568 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8569 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8570 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
8571 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
8572 VL);
8573 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
8574 VL);
8575 } else {
8576 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8577 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8578 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
8579 if (isPowerOf2_32(BW)) {
8580 // Z % BW -> Z & (BW - 1)
8581 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
8582 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8583 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
8584 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
8585 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
8586 } else {
8587 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8588 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
8589 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
8590 }
8591
8592 SDValue One = DAG.getConstant(1, DL, ShVT);
8593 if (IsFSHL) {
8594 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
8595 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
8596 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
8597 } else {
8598 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
8599 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
8600 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
8601 }
8602 }
8603 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
8604}
8605
8607 SelectionDAG &DAG) const {
8608 if (Node->isVPOpcode())
8609 return expandVPFunnelShift(Node, DAG);
8610
8611 EVT VT = Node->getValueType(0);
8612
8613 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8617 return SDValue();
8618
8619 SDValue X = Node->getOperand(0);
8620 SDValue Y = Node->getOperand(1);
8621 SDValue Z = Node->getOperand(2);
8622
8623 unsigned BW = VT.getScalarSizeInBits();
8624 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8625 SDLoc DL(SDValue(Node, 0));
8626
8627 EVT ShVT = Z.getValueType();
8628
8629 // If a funnel shift in the other direction is more supported, use it.
8630 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8631 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8632 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8633 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8634 // fshl X, Y, Z -> fshr X, Y, -Z
8635 // fshr X, Y, Z -> fshl X, Y, -Z
8636 Z = DAG.getNegative(Z, DL, ShVT);
8637 } else {
8638 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8639 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8640 SDValue One = DAG.getConstant(1, DL, ShVT);
8641 if (IsFSHL) {
8642 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8643 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8644 } else {
8645 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8646 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8647 }
8648 Z = DAG.getNOT(DL, Z, ShVT);
8649 }
8650 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8651 }
8652
8653 SDValue ShX, ShY;
8654 SDValue ShAmt, InvShAmt;
8655 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8656 // fshl: X << C | Y >> (BW - C)
8657 // fshr: X << (BW - C) | Y >> C
8658 // where C = Z % BW is not zero
8659 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8660 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8661 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8662 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8663 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8664 } else {
8665 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8666 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8667 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8668 if (isPowerOf2_32(BW)) {
8669 // Z % BW -> Z & (BW - 1)
8670 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8671 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8672 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8673 } else {
8674 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8675 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8676 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8677 }
8678
8679 SDValue One = DAG.getConstant(1, DL, ShVT);
8680 if (IsFSHL) {
8681 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8682 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8683 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8684 } else {
8685 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8686 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8687 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8688 }
8689 }
8690 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8691}
8692
8693// TODO: Merge with expandFunnelShift.
8695 SelectionDAG &DAG) const {
8696 EVT VT = Node->getValueType(0);
8697 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8698 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8699 SDValue Op0 = Node->getOperand(0);
8700 SDValue Op1 = Node->getOperand(1);
8701 SDLoc DL(SDValue(Node, 0));
8702
8703 EVT ShVT = Op1.getValueType();
8704 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8705
8706 // If a rotate in the other direction is more supported, use it.
8707 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8708 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8709 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8710 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8711 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8712 }
8713
8714 if (!AllowVectorOps && VT.isVector() &&
8720 return SDValue();
8721
8722 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8723 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8724 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8725 SDValue ShVal;
8726 SDValue HsVal;
8727 if (isPowerOf2_32(EltSizeInBits)) {
8728 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8729 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8730 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8731 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8732 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8733 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8734 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8735 } else {
8736 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8737 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8738 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8739 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8740 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8741 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8742 SDValue One = DAG.getConstant(1, DL, ShVT);
8743 HsVal =
8744 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8745 }
8746 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8747}
8748
8749/// Check if CLMUL on VT can eventually reach a type with legal CLMUL through
8750/// a chain of halving decompositions (halving element width) and/or vector
8751/// widening (doubling element count). This guides expansion strategy selection:
8752/// if true, the halving/widening path produces better code than bit-by-bit.
8753///
8754/// HalveDepth tracks halving steps only (each creates ~4x more operations).
8755/// Widening steps are cheap (O(1) pad/extract) and don't count.
8756/// Limiting halvings to 2 prevents exponential blowup:
8757/// 1 halving: ~4 sub-CLMULs (good, e.g. v8i16 -> v8i8)
8758/// 2 halvings: ~16 sub-CLMULs (acceptable, e.g. v4i32 -> v4i16 -> v8i8)
8759/// 3 halvings: ~64 sub-CLMULs (worse than bit-by-bit expansion)
8761 EVT VT, unsigned HalveDepth = 0,
8762 unsigned TotalDepth = 0) {
8763 if (HalveDepth > 2 || TotalDepth > 8 || !VT.isFixedLengthVector())
8764 return false;
8766 return true;
8767 if (!TLI.isTypeLegal(VT))
8768 return false;
8769
8770 unsigned BW = VT.getScalarSizeInBits();
8771
8772 // Halve: halve element width, same element count.
8773 // This is the expensive step -- each halving creates ~4x more operations.
8774 if (BW % 2 == 0) {
8775 EVT HalfEltVT = EVT::getIntegerVT(Ctx, BW / 2);
8776 EVT HalfVT = VT.changeVectorElementType(Ctx, HalfEltVT);
8777 if (TLI.isTypeLegal(HalfVT) &&
8778 canNarrowCLMULToLegal(TLI, Ctx, HalfVT, HalveDepth + 1, TotalDepth + 1))
8779 return true;
8780 }
8781
8782 // Widen: double element count (fixed-width vectors only).
8783 // This is cheap -- just INSERT_SUBVECTOR + EXTRACT_SUBVECTOR.
8784 EVT WideVT = VT.getDoubleNumVectorElementsVT(Ctx);
8785 if (TLI.isTypeLegal(WideVT) &&
8786 canNarrowCLMULToLegal(TLI, Ctx, WideVT, HalveDepth, TotalDepth + 1))
8787 return true;
8788
8789 return false;
8790}
8791
8793 SDLoc DL(Node);
8794 EVT VT = Node->getValueType(0);
8795 SDValue X = Node->getOperand(0);
8796 SDValue Y = Node->getOperand(1);
8797 unsigned BW = VT.getScalarSizeInBits();
8798 unsigned Opcode = Node->getOpcode();
8799 LLVMContext &Ctx = *DAG.getContext();
8800
8801 switch (Opcode) {
8802 case ISD::CLMUL: {
8803 // For vector types, try decomposition strategies that leverage legal
8804 // CLMUL on narrower or wider element types, avoiding the expensive
8805 // bit-by-bit expansion.
8806 if (VT.isVector()) {
8807 // Strategy 1: Halving decomposition to half-element-width CLMUL.
8808 // Applies ExpandIntRes_CLMUL's identity element-wise:
8809 // CLMUL(X, Y) = (Hi << HalfBW) | Lo
8810 // where:
8811 // Lo = CLMUL(XLo, YLo)
8812 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8813 unsigned HalfBW = BW / 2;
8814 if (BW % 2 == 0) {
8815 EVT HalfEltVT = EVT::getIntegerVT(Ctx, HalfBW);
8816 EVT HalfVT =
8817 EVT::getVectorVT(Ctx, HalfEltVT, VT.getVectorElementCount());
8818 if (isTypeLegal(HalfVT) && canNarrowCLMULToLegal(*this, Ctx, HalfVT,
8819 /*HalveDepth=*/1)) {
8820 SDValue ShAmt = DAG.getShiftAmountConstant(HalfBW, VT, DL);
8821
8822 // Extract low and high halves of each element.
8823 SDValue XLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, X);
8824 SDValue XHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
8825 DAG.getNode(ISD::SRL, DL, VT, X, ShAmt));
8826 SDValue YLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, Y);
8827 SDValue YHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT,
8828 DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt));
8829
8830 // Lo = CLMUL(XLo, YLo)
8831 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YLo);
8832
8833 // Hi = CLMULH(XLo, YLo) ^ CLMUL(XLo, YHi) ^ CLMUL(XHi, YLo)
8834 SDValue LoH = DAG.getNode(ISD::CLMULH, DL, HalfVT, XLo, YLo);
8835 SDValue Cross1 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XLo, YHi);
8836 SDValue Cross2 = DAG.getNode(ISD::CLMUL, DL, HalfVT, XHi, YLo);
8837 SDValue Cross = DAG.getNode(ISD::XOR, DL, HalfVT, Cross1, Cross2);
8838 SDValue Hi = DAG.getNode(ISD::XOR, DL, HalfVT, LoH, Cross);
8839
8840 // Reassemble: Result = ZExt(Lo) | (AnyExt(Hi) << HalfBW)
8841 SDValue LoExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo);
8842 SDValue HiExt = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Hi);
8843 SDValue HiShifted = DAG.getNode(ISD::SHL, DL, VT, HiExt, ShAmt);
8844 return DAG.getNode(ISD::OR, DL, VT, LoExt, HiShifted);
8845 }
8846 }
8847
8848 // Strategy 2: Promote to double-element-width CLMUL.
8849 // CLMUL(X, Y) = Trunc(CLMUL(AnyExt(X), AnyExt(Y)))
8850 {
8851 EVT ExtVT = VT.widenIntegerElementType(Ctx);
8852 if (isTypeLegal(ExtVT) && isOperationLegalOrCustom(ISD::CLMUL, ExtVT)) {
8853 // If CLMUL on ExtVT is Custom (not Legal), the target may
8854 // scalarize it, costing O(NumElements) scalar ops. The bit-by-bit
8855 // fallback costs O(BW) vectorized iterations. Only widen when
8856 // element count is small enough that scalarization is cheaper.
8857 unsigned NumElts = VT.getVectorMinNumElements();
8858 if (isOperationLegal(ISD::CLMUL, ExtVT) || NumElts < BW) {
8859 SDValue XExt = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, X);
8860 SDValue YExt = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, Y);
8861 SDValue Mul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8862 return DAG.getNode(ISD::TRUNCATE, DL, VT, Mul);
8863 }
8864 }
8865 }
8866
8867 // Strategy 3: Widen element count (pad with undef, do CLMUL on wider
8868 // vector, extract lower result). CLMUL is element-wise, so upper
8869 // (undef) lanes don't affect the lower results.
8870 // e.g. v4i16 => pad to v8i16 => halve to v8i8 PMUL => extract v4i16.
8871 if (auto EC = VT.getVectorElementCount(); EC.isFixed()) {
8872 EVT WideVT = EVT::getVectorVT(Ctx, VT.getVectorElementType(), EC * 2);
8873 if (isTypeLegal(WideVT) && canNarrowCLMULToLegal(*this, Ctx, WideVT)) {
8874 SDValue Undef = DAG.getUNDEF(WideVT);
8875 SDValue XWide = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, Undef,
8876 X, DAG.getVectorIdxConstant(0, DL));
8877 SDValue YWide = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, Undef,
8878 Y, DAG.getVectorIdxConstant(0, DL));
8879 SDValue WideRes = DAG.getNode(ISD::CLMUL, DL, WideVT, XWide, YWide);
8880 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, WideRes,
8881 DAG.getVectorIdxConstant(0, DL));
8882 }
8883 }
8884 }
8885
8886 // Special case: clmul(X, ~0) is equivalent to a "parallel prefix XOR" or
8887 // "bitwise parity" operation.
8889 SDValue R = X;
8890 for (unsigned I = 1; I < BW; I <<= 1) {
8891 SDValue ShAmt = DAG.getShiftAmountConstant(I, VT, DL);
8892 SDValue Shifted = DAG.getNode(ISD::SHL, DL, VT, R, ShAmt);
8893 R = DAG.getNode(ISD::XOR, DL, VT, R, Shifted);
8894 }
8895 return R;
8896 }
8897
8898 // NOTE: If you change this expansion, please update the cost model
8899 // calculation in BasicTTIImpl::getTypeBasedIntrinsicInstrCost for
8900 // Intrinsic::clmul.
8901
8902 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
8903
8904 SDValue Res = DAG.getConstant(0, DL, VT);
8905 for (unsigned I = 0; I < BW; ++I) {
8906 SDValue ShiftAmt = DAG.getShiftAmountConstant(I, VT, DL);
8907 SDValue Mask = DAG.getConstant(APInt::getOneBitSet(BW, I), DL, VT);
8908 SDValue YMasked = DAG.getNode(ISD::AND, DL, VT, Y, Mask);
8909
8910 // For targets with a fast bit test instruction (e.g., x86 BT) or without
8911 // multiply, use a shift-based expansion to avoid expensive MUL
8912 // instructions.
8913 SDValue Part;
8914 if (!hasBitTest(Y, ShiftAmt) &&
8917 Part = DAG.getNode(ISD::MUL, DL, VT, X, YMasked);
8918 } else {
8919 // Canonical bit test: (Y & (1 << I)) != 0
8920 SDValue Zero = DAG.getConstant(0, DL, VT);
8921 SDValue Cond = DAG.getSetCC(DL, SetCCVT, YMasked, Zero, ISD::SETEQ);
8922 SDValue XShifted = DAG.getNode(ISD::SHL, DL, VT, X, ShiftAmt);
8923 Part = DAG.getSelect(DL, VT, Cond, Zero, XShifted);
8924 }
8925 Res = DAG.getNode(ISD::XOR, DL, VT, Res, Part);
8926 }
8927 return Res;
8928 }
8929 case ISD::CLMULR:
8930 // If we have CLMUL/CLMULH, merge the shifted results to form CLMULR.
8933 SDValue Lo = DAG.getNode(ISD::CLMUL, DL, VT, X, Y);
8934 SDValue Hi = DAG.getNode(ISD::CLMULH, DL, VT, X, Y);
8935 Lo = DAG.getNode(ISD::SRL, DL, VT, Lo,
8936 DAG.getShiftAmountConstant(BW - 1, VT, DL));
8937 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
8938 DAG.getShiftAmountConstant(1, VT, DL));
8939 return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
8940 }
8941 [[fallthrough]];
8942 case ISD::CLMULH: {
8943 EVT ExtVT = VT.widenIntegerElementType(Ctx);
8944 // Use bitreverse-based lowering (CLMULR/H = rev(CLMUL(rev,rev)) >> S)
8945 // when any of these hold:
8946 // (a) ZERO_EXTEND to ExtVT or SRL on ExtVT isn't legal.
8947 // (b) CLMUL is legal on VT but not on ExtVT (e.g. v8i8 on AArch64).
8948 // (c) CLMUL on ExtVT isn't legal, but CLMUL on VT can be efficiently
8949 // expanded via halving/widening to reach legal CLMUL. The bitreverse
8950 // path creates CLMUL(VT) which will be expanded efficiently. The
8951 // promote path would create CLMUL(ExtVT) => halving => CLMULH(VT),
8952 // causing a cycle.
8953 // Note: when CLMUL is legal on ExtVT, the zext => CLMUL(ExtVT) => shift
8954 // => trunc path is preferred over the bitreverse path, as it avoids the
8955 // cost of 3 bitreverse operations.
8960 canNarrowCLMULToLegal(*this, Ctx, VT)))) {
8961 SDValue XRev = DAG.getNode(ISD::BITREVERSE, DL, VT, X);
8962 SDValue YRev = DAG.getNode(ISD::BITREVERSE, DL, VT, Y);
8963 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, VT, XRev, YRev);
8964 SDValue Res = DAG.getNode(ISD::BITREVERSE, DL, VT, ClMul);
8965 if (Opcode == ISD::CLMULH)
8966 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
8967 DAG.getShiftAmountConstant(1, VT, DL));
8968 return Res;
8969 }
8970 SDValue XExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, X);
8971 SDValue YExt = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Y);
8972 SDValue ClMul = DAG.getNode(ISD::CLMUL, DL, ExtVT, XExt, YExt);
8973 unsigned ShAmt = Opcode == ISD::CLMULR ? BW - 1 : BW;
8974 SDValue HiBits = DAG.getNode(ISD::SRL, DL, ExtVT, ClMul,
8975 DAG.getShiftAmountConstant(ShAmt, ExtVT, DL));
8976 return DAG.getNode(ISD::TRUNCATE, DL, VT, HiBits);
8977 }
8978 }
8979 llvm_unreachable("Expected CLMUL, CLMULR, or CLMULH");
8980}
8981
8983 SelectionDAG &DAG) const {
8984 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8985 EVT VT = Node->getValueType(0);
8986 unsigned VTBits = VT.getScalarSizeInBits();
8987 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8988
8989 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8990 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8991 SDValue ShOpLo = Node->getOperand(0);
8992 SDValue ShOpHi = Node->getOperand(1);
8993 SDValue ShAmt = Node->getOperand(2);
8994 EVT ShAmtVT = ShAmt.getValueType();
8995 EVT ShAmtCCVT =
8996 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8997 SDLoc dl(Node);
8998
8999 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
9000 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
9001 // away during isel.
9002 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
9003 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
9004 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
9005 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
9006 : DAG.getConstant(0, dl, VT);
9007
9008 SDValue Tmp2, Tmp3;
9009 if (IsSHL) {
9010 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
9011 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
9012 } else {
9013 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
9014 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
9015 }
9016
9017 // If the shift amount is larger or equal than the width of a part we don't
9018 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
9019 // values for large shift amounts.
9020 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
9021 DAG.getConstant(VTBits, dl, ShAmtVT));
9022 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
9023 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
9024
9025 if (IsSHL) {
9026 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
9027 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
9028 } else {
9029 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
9030 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
9031 }
9032}
9033
9035 SelectionDAG &DAG) const {
9036 // This implements llvm.canonicalize.f* by multiplication with 1.0, as
9037 // suggested in
9038 // https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
9039 // It uses strict_fp operations even outside a strict_fp context in order
9040 // to guarantee that the canonicalization is not optimized away by later
9041 // passes. The result chain introduced by that is intentionally ignored
9042 // since no ordering requirement is intended here.
9043 EVT VT = Node->getValueType(0);
9044 SDLoc DL(Node);
9045 SDNodeFlags Flags = Node->getFlags();
9046 Flags.setNoFPExcept(true);
9047 SDValue One = DAG.getConstantFP(1.0, DL, VT);
9048 SDValue Mul =
9049 DAG.getNode(ISD::STRICT_FMUL, DL, {VT, MVT::Other},
9050 {DAG.getEntryNode(), Node->getOperand(0), One}, Flags);
9051 return Mul;
9052}
9053
9054SDValue
9056 SelectionDAG &DAG) const {
9057 SDLoc dl(Node);
9058 EVT DstVT = Node->getValueType(0);
9059 EVT DstScalarVT = DstVT.getScalarType();
9060
9061 SDValue IntVal = Node->getOperand(0);
9062 const uint64_t SemEnum = Node->getConstantOperandVal(1);
9063 const auto Sem = static_cast<APFloatBase::Semantics>(SemEnum);
9064
9065 // Supported source formats.
9066 switch (Sem) {
9072 break;
9073 default:
9074 DAG.getContext()->emitError("CONVERT_FROM_ARBITRARY_FP: not implemented "
9075 "source format (semantics enum " +
9076 Twine(SemEnum) + ")");
9077 return SDValue();
9078 }
9079
9080 const fltSemantics &SrcSem = APFloatBase::EnumToSemantics(Sem);
9081 const unsigned SrcBits = APFloat::getSizeInBits(SrcSem);
9082 const unsigned SrcPrecision = APFloat::semanticsPrecision(SrcSem);
9083 const unsigned SrcMant = SrcPrecision - 1;
9084 const unsigned SrcExp = SrcBits - SrcMant - 1;
9085 const int SrcBias = 1 - APFloat::semanticsMinExponent(SrcSem);
9086 const fltNonfiniteBehavior NFBehavior = SrcSem.nonFiniteBehavior;
9087
9088 // Destination format parameters.
9089 const fltSemantics &DstSem = DstScalarVT.getFltSemantics();
9090 const unsigned DstBits = APFloat::getSizeInBits(DstSem);
9091 const unsigned DstMant = APFloat::semanticsPrecision(DstSem) - 1;
9092 const unsigned DstExpBits = DstBits - DstMant - 1;
9093 const int DstMinExp = APFloat::semanticsMinExponent(DstSem);
9094 const int DstBias = 1 - DstMinExp;
9095 const uint64_t DstExpAllOnes = (1ULL << DstExpBits) - 1;
9096
9097 // Work in an integer type matching the destination float width.
9098 EVT IntScalarVT = EVT::getIntegerVT(*DAG.getContext(), DstBits);
9099 EVT IntVT = DstVT.isVector()
9100 ? EVT::getVectorVT(*DAG.getContext(), IntScalarVT,
9101 DstVT.getVectorElementCount())
9102 : IntScalarVT;
9103
9104 SDValue Src = DAG.getZExtOrTrunc(IntVal, dl, IntVT);
9105
9106 EVT SetCCVT =
9107 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), IntVT);
9108
9109 SDValue Zero = DAG.getConstant(0, dl, IntVT);
9110 SDValue One = DAG.getConstant(1, dl, IntVT);
9111
9112 // Extract bit fields.
9113 const uint64_t MantMask = (SrcMant > 0) ? ((1ULL << SrcMant) - 1) : 0;
9114 const uint64_t ExpMask = (1ULL << SrcExp) - 1;
9115
9116 SDValue MantField = DAG.getNode(ISD::AND, dl, IntVT, Src,
9117 DAG.getConstant(MantMask, dl, IntVT));
9118
9119 SDValue ExpField =
9120 DAG.getNode(ISD::AND, dl, IntVT,
9121 DAG.getNode(ISD::SRL, dl, IntVT, Src,
9122 DAG.getShiftAmountConstant(SrcMant, IntVT, dl)),
9123 DAG.getConstant(ExpMask, dl, IntVT));
9124
9125 SDValue SignBit =
9126 DAG.getNode(ISD::SRL, dl, IntVT, Src,
9127 DAG.getShiftAmountConstant(SrcBits - 1, IntVT, dl));
9128
9129 SDValue SignShifted =
9130 DAG.getNode(ISD::SHL, dl, IntVT, SignBit,
9131 DAG.getShiftAmountConstant(DstBits - 1, IntVT, dl));
9132
9133 // Classify the input.
9134 SDValue ExpAllOnes = DAG.getConstant(ExpMask, dl, IntVT);
9135 SDValue IsExpAllOnes =
9136 DAG.getSetCC(dl, SetCCVT, ExpField, ExpAllOnes, ISD::SETEQ);
9137 SDValue IsExpZero = DAG.getSetCC(dl, SetCCVT, ExpField, Zero, ISD::SETEQ);
9138 SDValue IsMantZero = DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETEQ);
9139 SDValue IsMantNonZero =
9140 DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETNE);
9141
9142 SDValue IsNaN;
9143 if (NFBehavior == fltNonfiniteBehavior::FiniteOnly) {
9144 IsNaN = DAG.getBoolConstant(false, dl, SetCCVT, IntVT);
9145 } else if (NFBehavior == fltNonfiniteBehavior::IEEE754) {
9146 IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantNonZero);
9147 } else {
9149 SDValue MantAllOnes = DAG.getConstant(MantMask, dl, IntVT);
9150 SDValue IsMantAllOnes =
9151 DAG.getSetCC(dl, SetCCVT, MantField, MantAllOnes, ISD::SETEQ);
9152 IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantAllOnes);
9153 }
9154
9155 SDValue IsInf;
9156 if (NFBehavior == fltNonfiniteBehavior::IEEE754)
9157 IsInf = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantZero);
9158 else
9159 IsInf = DAG.getBoolConstant(false, dl, SetCCVT, IntVT);
9160
9161 SDValue IsZero = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantZero);
9162 SDValue IsDenorm =
9163 DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantNonZero);
9164
9165 // Normal value conversion.
9166 const int BiasAdjust = DstBias - SrcBias;
9167 SDValue NormDstExp =
9168 DAG.getNode(ISD::ADD, dl, IntVT, ExpField,
9169 DAG.getConstant(APInt(DstBits, BiasAdjust, true), dl, IntVT));
9170
9171 SDValue NormDstMant;
9172 if (DstMant > SrcMant) {
9173 SDValue NormDstMantShift =
9174 DAG.getShiftAmountConstant(DstMant - SrcMant, IntVT, dl);
9175 NormDstMant = DAG.getNode(ISD::SHL, dl, IntVT, MantField, NormDstMantShift);
9176 } else {
9177 NormDstMant = MantField;
9178 }
9179
9180 SDValue DstMantShift = DAG.getShiftAmountConstant(DstMant, IntVT, dl);
9181 SDValue NormExpShifted =
9182 DAG.getNode(ISD::SHL, dl, IntVT, NormDstExp, DstMantShift);
9183 SDValue NormResult =
9184 DAG.getNode(ISD::OR, dl, IntVT,
9185 DAG.getNode(ISD::OR, dl, IntVT, SignShifted, NormExpShifted),
9186 NormDstMant);
9187
9188 // Denormal value conversion.
9189 SDValue DenormResult;
9190 {
9191 const unsigned IntVTBits = DstBits;
9192 SDValue LeadingZeros =
9193 DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, IntVT, MantField);
9194
9195 const int DenormExpConst =
9196 (int)IntVTBits + DstBias - SrcBias - (int)SrcMant;
9197 SDValue DenormDstExp = DAG.getNode(
9198 ISD::SUB, dl, IntVT,
9199 DAG.getConstant(APInt(DstBits, DenormExpConst, true), dl, IntVT),
9200 LeadingZeros);
9201
9202 SDValue MantMSB =
9203 DAG.getNode(ISD::SUB, dl, IntVT,
9204 DAG.getConstant(IntVTBits - 1, dl, IntVT), LeadingZeros);
9205
9206 SDValue LeadingOne = DAG.getNode(ISD::SHL, dl, IntVT, One, MantMSB);
9207 SDValue Frac = DAG.getNode(ISD::XOR, dl, IntVT, MantField, LeadingOne);
9208
9209 const unsigned ShiftSub = IntVTBits - 1 - DstMant;
9210 SDValue ShiftAmount = DAG.getNode(ISD::SUB, dl, IntVT, LeadingZeros,
9211 DAG.getConstant(ShiftSub, dl, IntVT));
9212
9213 SDValue DenormDstMant = DAG.getNode(ISD::SHL, dl, IntVT, Frac, ShiftAmount);
9214
9215 SDValue DenormExpShifted =
9216 DAG.getNode(ISD::SHL, dl, IntVT, DenormDstExp, DstMantShift);
9217 DenormResult = DAG.getNode(
9218 ISD::OR, dl, IntVT,
9219 DAG.getNode(ISD::OR, dl, IntVT, SignShifted, DenormExpShifted),
9220 DenormDstMant);
9221 }
9222
9223 SDValue FiniteResult =
9224 DAG.getSelect(dl, IntVT, IsDenorm, DenormResult, NormResult);
9225
9226 const uint64_t QNaNBit = (DstMant > 0) ? (1ULL << (DstMant - 1)) : 0;
9227 SDValue NaNResult =
9228 DAG.getConstant((DstExpAllOnes << DstMant) | QNaNBit, dl, IntVT);
9229
9230 SDValue InfResult =
9231 DAG.getNode(ISD::OR, dl, IntVT, SignShifted,
9232 DAG.getConstant(DstExpAllOnes << DstMant, dl, IntVT));
9233
9234 SDValue ZeroResult = SignShifted;
9235
9236 SDValue Result = FiniteResult;
9237 Result = DAG.getSelect(dl, IntVT, IsZero, ZeroResult, Result);
9238 Result = DAG.getSelect(dl, IntVT, IsInf, InfResult, Result);
9239 Result = DAG.getSelect(dl, IntVT, IsNaN, NaNResult, Result);
9240
9241 return DAG.getNode(ISD::BITCAST, dl, DstVT, Result);
9242}
9243
9245 SelectionDAG &DAG) const {
9246 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
9247 SDValue Src = Node->getOperand(OpNo);
9248 EVT SrcVT = Src.getValueType();
9249 EVT DstVT = Node->getValueType(0);
9250 SDLoc dl(SDValue(Node, 0));
9251
9252 // FIXME: Only f32 to i64 conversions are supported.
9253 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
9254 return false;
9255
9256 if (Node->isStrictFPOpcode())
9257 // When a NaN is converted to an integer a trap is allowed. We can't
9258 // use this expansion here because it would eliminate that trap. Other
9259 // traps are also allowed and cannot be eliminated. See
9260 // IEEE 754-2008 sec 5.8.
9261 return false;
9262
9263 // Expand f32 -> i64 conversion
9264 // This algorithm comes from compiler-rt's implementation of fixsfdi:
9265 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
9266 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
9267 EVT IntVT = SrcVT.changeTypeToInteger();
9268 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
9269
9270 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
9271 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
9272 SDValue Bias = DAG.getConstant(127, dl, IntVT);
9273 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
9274 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
9275 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
9276
9277 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
9278
9279 SDValue ExponentBits = DAG.getNode(
9280 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
9281 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
9282 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
9283
9284 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
9285 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
9286 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
9287 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
9288
9289 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
9290 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
9291 DAG.getConstant(0x00800000, dl, IntVT));
9292
9293 R = DAG.getZExtOrTrunc(R, dl, DstVT);
9294
9295 R = DAG.getSelectCC(
9296 dl, Exponent, ExponentLoBit,
9297 DAG.getNode(ISD::SHL, dl, DstVT, R,
9298 DAG.getZExtOrTrunc(
9299 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
9300 dl, IntShVT)),
9301 DAG.getNode(ISD::SRL, dl, DstVT, R,
9302 DAG.getZExtOrTrunc(
9303 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
9304 dl, IntShVT)),
9305 ISD::SETGT);
9306
9307 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
9308 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
9309
9310 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
9311 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
9312 return true;
9313}
9314
9316 SDValue &Chain,
9317 SelectionDAG &DAG) const {
9318 SDLoc dl(SDValue(Node, 0));
9319 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
9320 SDValue Src = Node->getOperand(OpNo);
9321
9322 EVT SrcVT = Src.getValueType();
9323 EVT DstVT = Node->getValueType(0);
9324 EVT SetCCVT =
9325 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
9326 EVT DstSetCCVT =
9327 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
9328
9329 // Only expand vector types if we have the appropriate vector bit operations.
9330 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
9332 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
9334 return false;
9335
9336 // If the maximum float value is smaller then the signed integer range,
9337 // the destination signmask can't be represented by the float, so we can
9338 // just use FP_TO_SINT directly.
9339 const fltSemantics &APFSem = SrcVT.getFltSemantics();
9340 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
9341 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
9343 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
9344 if (Node->isStrictFPOpcode()) {
9345 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
9346 { Node->getOperand(0), Src });
9347 Chain = Result.getValue(1);
9348 } else
9349 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
9350 return true;
9351 }
9352
9353 // Don't expand it if there isn't cheap fsub instruction.
9355 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
9356 return false;
9357
9358 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
9359 SDValue Sel;
9360
9361 if (Node->isStrictFPOpcode()) {
9362 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
9363 Node->getOperand(0), /*IsSignaling*/ true);
9364 Chain = Sel.getValue(1);
9365 } else {
9366 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
9367 }
9368
9369 bool Strict = Node->isStrictFPOpcode() ||
9370 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
9371
9372 if (Strict) {
9373 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
9374 // signmask then offset (the result of which should be fully representable).
9375 // Sel = Src < 0x8000000000000000
9376 // FltOfs = select Sel, 0, 0x8000000000000000
9377 // IntOfs = select Sel, 0, 0x8000000000000000
9378 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
9379
9380 // TODO: Should any fast-math-flags be set for the FSUB?
9381 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
9382 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
9383 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
9384 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
9385 DAG.getConstant(0, dl, DstVT),
9386 DAG.getConstant(SignMask, dl, DstVT));
9387 SDValue SInt;
9388 if (Node->isStrictFPOpcode()) {
9389 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
9390 { Chain, Src, FltOfs });
9391 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
9392 { Val.getValue(1), Val });
9393 Chain = SInt.getValue(1);
9394 } else {
9395 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
9396 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
9397 }
9398 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
9399 } else {
9400 // Expand based on maximum range of FP_TO_SINT:
9401 // True = fp_to_sint(Src)
9402 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
9403 // Result = select (Src < 0x8000000000000000), True, False
9404
9405 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
9406 // TODO: Should any fast-math-flags be set for the FSUB?
9407 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
9408 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
9409 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
9410 DAG.getConstant(SignMask, dl, DstVT));
9411 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
9412 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
9413 }
9414 return true;
9415}
9416
9418 SDValue &Chain, SelectionDAG &DAG) const {
9419 // This transform is not correct for converting 0 when rounding mode is set
9420 // to round toward negative infinity which will produce -0.0. So disable
9421 // under strictfp.
9422 if (Node->isStrictFPOpcode())
9423 return false;
9424
9425 SDValue Src = Node->getOperand(0);
9426 EVT SrcVT = Src.getValueType();
9427 EVT DstVT = Node->getValueType(0);
9428
9429 // If the input is known to be non-negative and SINT_TO_FP is legal then use
9430 // it.
9431 if (Node->getFlags().hasNonNeg() &&
9433 Result =
9434 DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
9435 return true;
9436 }
9437
9438 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
9439 return false;
9440
9441 // Only expand vector types if we have the appropriate vector bit
9442 // operations.
9443 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
9448 return false;
9449
9450 SDLoc dl(SDValue(Node, 0));
9451
9452 // Implementation of unsigned i64 to f64 following the algorithm in
9453 // __floatundidf in compiler_rt. This implementation performs rounding
9454 // correctly in all rounding modes with the exception of converting 0
9455 // when rounding toward negative infinity. In that case the fsub will
9456 // produce -0.0. This will be added to +0.0 and produce -0.0 which is
9457 // incorrect.
9458 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
9459 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
9460 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
9461 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
9462 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
9463 SDValue HiShift = DAG.getShiftAmountConstant(32, SrcVT, dl);
9464
9465 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
9466 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
9467 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
9468 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
9469 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
9470 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
9471 SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
9472 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
9473 return true;
9474}
9475
9476SDValue
9478 SelectionDAG &DAG) const {
9479 unsigned Opcode = Node->getOpcode();
9480 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
9481 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
9482 "Wrong opcode");
9483
9484 if (Node->getFlags().hasNoNaNs()) {
9485 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
9486 EVT VT = Node->getValueType(0);
9487 if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
9489 VT.isVector())
9490 return SDValue();
9491 SDValue Op1 = Node->getOperand(0);
9492 SDValue Op2 = Node->getOperand(1);
9493 return DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred,
9494 Node->getFlags());
9495 }
9496
9497 return SDValue();
9498}
9499
9501 SelectionDAG &DAG) const {
9502 if (SDValue Expanded = expandVectorNaryOpBySplitting(Node, DAG))
9503 return Expanded;
9504
9505 EVT VT = Node->getValueType(0);
9506 if (VT.isScalableVector())
9508 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
9509
9510 SDLoc dl(Node);
9511 unsigned NewOp =
9513
9514 if (isOperationLegalOrCustom(NewOp, VT)) {
9515 SDValue Quiet0 = Node->getOperand(0);
9516 SDValue Quiet1 = Node->getOperand(1);
9517
9518 if (!Node->getFlags().hasNoNaNs()) {
9519 // Insert canonicalizes if it's possible we need to quiet to get correct
9520 // sNaN behavior.
9521 if (!DAG.isKnownNeverSNaN(Quiet0)) {
9522 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
9523 Node->getFlags());
9524 }
9525 if (!DAG.isKnownNeverSNaN(Quiet1)) {
9526 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
9527 Node->getFlags());
9528 }
9529 }
9530
9531 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
9532 }
9533
9534 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
9535 // instead if there are no NaNs.
9536 if (Node->getFlags().hasNoNaNs() ||
9537 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
9538 DAG.isKnownNeverNaN(Node->getOperand(1)))) {
9539 unsigned IEEE2018Op =
9540 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
9541 if (isOperationLegalOrCustom(IEEE2018Op, VT))
9542 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
9543 Node->getOperand(1), Node->getFlags());
9544 }
9545
9547 return SelCC;
9548
9549 return SDValue();
9550}
9551
9553 SelectionDAG &DAG) const {
9554 if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
9555 return Expanded;
9556
9557 SDLoc DL(N);
9558 SDValue LHS = N->getOperand(0);
9559 SDValue RHS = N->getOperand(1);
9560 unsigned Opc = N->getOpcode();
9561 EVT VT = N->getValueType(0);
9562 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9563 bool IsMax = Opc == ISD::FMAXIMUM;
9564 SDNodeFlags Flags = N->getFlags();
9565
9566 // First, implement comparison not propagating NaN. If no native fmin or fmax
9567 // available, use plain select with setcc instead.
9569 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9570 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
9571
9572 // FIXME: We should probably define fminnum/fmaxnum variants with correct
9573 // signed zero behavior.
9574 bool MinMaxMustRespectOrderedZero = false;
9575
9576 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
9577 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
9578 MinMaxMustRespectOrderedZero = true;
9579 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
9580 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
9581 } else {
9583 return DAG.UnrollVectorOp(N);
9584
9585 // NaN (if exists) will be propagated later, so orderness doesn't matter.
9586 SDValue Compare =
9587 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
9588 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
9589 }
9590
9591 // Propagate any NaN of both operands
9592 if (!N->getFlags().hasNoNaNs() &&
9593 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
9594 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
9596 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
9597 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
9598 }
9599
9600 // fminimum/fmaximum requires -0.0 less than +0.0
9601 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
9602 !DAG.isKnownNeverLogicalZero(RHS) && !DAG.isKnownNeverLogicalZero(LHS)) {
9603 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
9604 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
9605 SDValue TestZero =
9606 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
9607 SDValue LCmp = DAG.getSelect(
9608 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
9609 MinMax, Flags);
9610 SDValue RCmp = DAG.getSelect(
9611 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
9612 LCmp, Flags);
9613 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
9614 }
9615
9616 return MinMax;
9617}
9618
9620 SelectionDAG &DAG) const {
9621 SDLoc DL(Node);
9622 SDValue LHS = Node->getOperand(0);
9623 SDValue RHS = Node->getOperand(1);
9624 unsigned Opc = Node->getOpcode();
9625 EVT VT = Node->getValueType(0);
9626 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9627 bool IsMax = Opc == ISD::FMAXIMUMNUM;
9628 SDNodeFlags Flags = Node->getFlags();
9629
9630 unsigned NewOp =
9632
9633 if (isOperationLegalOrCustom(NewOp, VT)) {
9634 if (!Flags.hasNoNaNs()) {
9635 // Insert canonicalizes if it's possible we need to quiet to get correct
9636 // sNaN behavior.
9637 if (!DAG.isKnownNeverSNaN(LHS)) {
9638 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
9639 }
9640 if (!DAG.isKnownNeverSNaN(RHS)) {
9641 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
9642 }
9643 }
9644
9645 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
9646 }
9647
9648 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
9649 // same behaviors for all of other cases: +0.0 vs -0.0 included.
9650 if (Flags.hasNoNaNs() ||
9651 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
9652 unsigned IEEE2019Op =
9654 if (isOperationLegalOrCustom(IEEE2019Op, VT))
9655 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
9656 }
9657
9658 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
9659 // either one for +0.0 vs -0.0.
9660 if ((Flags.hasNoNaNs() ||
9661 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
9662 (Flags.hasNoSignedZeros() || DAG.isKnownNeverLogicalZero(LHS) ||
9663 DAG.isKnownNeverLogicalZero(RHS))) {
9664 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
9665 if (isOperationLegalOrCustom(IEEE2008Op, VT))
9666 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
9667 }
9668
9669 if (VT.isVector() &&
9672 return DAG.UnrollVectorOp(Node);
9673
9674 // If only one operand is NaN, override it with another operand.
9675 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
9676 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
9677 }
9678 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
9679 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
9680 }
9681
9682 // Always prefer RHS if equal.
9683 SDValue MinMax =
9684 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
9685
9686 // TODO: We need quiet sNaN if strictfp.
9687
9688 // Fixup signed zero behavior.
9689 if (Flags.hasNoSignedZeros() || DAG.isKnownNeverLogicalZero(LHS) ||
9690 DAG.isKnownNeverLogicalZero(RHS)) {
9691 return MinMax;
9692 }
9693 SDValue TestZero =
9694 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
9695 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
9696 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
9697 EVT IntVT = VT.changeTypeToInteger();
9698 EVT FloatVT = VT.changeElementType(*DAG.getContext(), MVT::f32);
9699 SDValue LHSTrunc = LHS;
9701 LHSTrunc = DAG.getNode(ISD::FP_ROUND, DL, FloatVT, LHS,
9702 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
9703 }
9704 // It's OK to select from LHS and MinMax, with only one ISD::IS_FPCLASS, as
9705 // we preferred RHS when generate MinMax, if the operands are equal.
9706 SDValue RetZero = DAG.getSelect(
9707 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHSTrunc, TestZero), LHS,
9708 MinMax, Flags);
9709 return DAG.getSelect(DL, VT, IsZero, RetZero, MinMax, Flags);
9710}
9711
9712/// Returns a true value if if this FPClassTest can be performed with an ordered
9713/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
9714/// std::nullopt if it cannot be performed as a compare with 0.
9715static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
9716 const fltSemantics &Semantics,
9717 const MachineFunction &MF) {
9718 FPClassTest OrderedMask = Test & ~fcNan;
9719 FPClassTest NanTest = Test & fcNan;
9720 bool IsOrdered = NanTest == fcNone;
9721 bool IsUnordered = NanTest == fcNan;
9722
9723 // Skip cases that are testing for only a qnan or snan.
9724 if (!IsOrdered && !IsUnordered)
9725 return std::nullopt;
9726
9727 if (OrderedMask == fcZero &&
9728 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
9729 return IsOrdered;
9730 if (OrderedMask == (fcZero | fcSubnormal) &&
9731 MF.getDenormalMode(Semantics).inputsAreZero())
9732 return IsOrdered;
9733 return std::nullopt;
9734}
9735
9737 const FPClassTest OrigTestMask,
9738 SDNodeFlags Flags, const SDLoc &DL,
9739 SelectionDAG &DAG) const {
9740 EVT OperandVT = Op.getValueType();
9741 assert(OperandVT.isFloatingPoint());
9742 FPClassTest Test = OrigTestMask;
9743
9744 // Degenerated cases.
9745 if (Test == fcNone)
9746 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
9747 if (Test == fcAllFlags)
9748 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
9749
9750 // PPC double double is a pair of doubles, of which the higher part determines
9751 // the value class.
9752 if (OperandVT == MVT::ppcf128) {
9753 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
9754 DAG.getConstant(1, DL, MVT::i32));
9755 OperandVT = MVT::f64;
9756 }
9757
9758 // Floating-point type properties.
9759 EVT ScalarFloatVT = OperandVT.getScalarType();
9760 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
9761 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
9762 bool IsF80 = (ScalarFloatVT == MVT::f80);
9763
9764 // Some checks can be implemented using float comparisons, if floating point
9765 // exceptions are ignored.
9766 if (Flags.hasNoFPExcept() &&
9768 FPClassTest FPTestMask = Test;
9769 bool IsInvertedFP = false;
9770
9771 if (FPClassTest InvertedFPCheck =
9772 invertFPClassTestIfSimpler(FPTestMask, true)) {
9773 FPTestMask = InvertedFPCheck;
9774 IsInvertedFP = true;
9775 }
9776
9777 ISD::CondCode OrderedCmpOpcode = IsInvertedFP ? ISD::SETUNE : ISD::SETOEQ;
9778 ISD::CondCode UnorderedCmpOpcode = IsInvertedFP ? ISD::SETONE : ISD::SETUEQ;
9779
9780 // See if we can fold an | fcNan into an unordered compare.
9781 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
9782
9783 // Can't fold the ordered check if we're only testing for snan or qnan
9784 // individually.
9785 if ((FPTestMask & fcNan) != fcNan)
9786 OrderedFPTestMask = FPTestMask;
9787
9788 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
9789
9790 if (std::optional<bool> IsCmp0 =
9791 isFCmpEqualZero(FPTestMask, Semantics, DAG.getMachineFunction());
9792 IsCmp0 && (isCondCodeLegalOrCustom(
9793 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
9794 OperandVT.getScalarType().getSimpleVT()))) {
9795
9796 // If denormals could be implicitly treated as 0, this is not equivalent
9797 // to a compare with 0 since it will also be true for denormals.
9798 return DAG.getSetCC(DL, ResultVT, Op,
9799 DAG.getConstantFP(0.0, DL, OperandVT),
9800 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
9801 }
9802
9803 if (FPTestMask == fcNan &&
9805 OperandVT.getScalarType().getSimpleVT()))
9806 return DAG.getSetCC(DL, ResultVT, Op, Op,
9807 IsInvertedFP ? ISD::SETO : ISD::SETUO);
9808
9809 bool IsOrderedInf = FPTestMask == fcInf;
9810 if ((FPTestMask == fcInf || FPTestMask == (fcInf | fcNan)) &&
9811 isCondCodeLegalOrCustom(IsOrderedInf ? OrderedCmpOpcode
9812 : UnorderedCmpOpcode,
9813 OperandVT.getScalarType().getSimpleVT()) &&
9816 (OperandVT.isVector() &&
9818 // isinf(x) --> fabs(x) == inf
9819 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9820 SDValue Inf =
9821 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9822 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
9823 IsOrderedInf ? OrderedCmpOpcode : UnorderedCmpOpcode);
9824 }
9825
9826 if ((OrderedFPTestMask == fcPosInf || OrderedFPTestMask == fcNegInf) &&
9827 isCondCodeLegalOrCustom(IsOrdered ? OrderedCmpOpcode
9828 : UnorderedCmpOpcode,
9829 OperandVT.getSimpleVT())) {
9830 // isposinf(x) --> x == inf
9831 // isneginf(x) --> x == -inf
9832 // isposinf(x) || nan --> x u== inf
9833 // isneginf(x) || nan --> x u== -inf
9834
9835 SDValue Inf = DAG.getConstantFP(
9836 APFloat::getInf(Semantics, OrderedFPTestMask == fcNegInf), DL,
9837 OperandVT);
9838 return DAG.getSetCC(DL, ResultVT, Op, Inf,
9839 IsOrdered ? OrderedCmpOpcode : UnorderedCmpOpcode);
9840 }
9841
9842 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
9843 // TODO: Could handle ordered case, but it produces worse code for
9844 // x86. Maybe handle ordered if fabs is free?
9845
9846 ISD::CondCode OrderedOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9847 ISD::CondCode UnorderedOp = IsInvertedFP ? ISD::SETOGE : ISD::SETULT;
9848
9849 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
9850 OperandVT.getScalarType().getSimpleVT())) {
9851 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
9852
9853 // TODO: Maybe only makes sense if fabs is free. Integer test of
9854 // exponent bits seems better for x86.
9855 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9856 SDValue SmallestNormal = DAG.getConstantFP(
9857 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9858 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
9859 IsOrdered ? OrderedOp : UnorderedOp);
9860 }
9861 }
9862
9863 if (FPTestMask == fcNormal) {
9864 // TODO: Handle unordered
9865 ISD::CondCode IsFiniteOp = IsInvertedFP ? ISD::SETUGE : ISD::SETOLT;
9866 ISD::CondCode IsNormalOp = IsInvertedFP ? ISD::SETOLT : ISD::SETUGE;
9867
9868 if (isCondCodeLegalOrCustom(IsFiniteOp,
9869 OperandVT.getScalarType().getSimpleVT()) &&
9870 isCondCodeLegalOrCustom(IsNormalOp,
9871 OperandVT.getScalarType().getSimpleVT()) &&
9872 isFAbsFree(OperandVT)) {
9873 // isnormal(x) --> fabs(x) < infinity && !(fabs(x) < smallest_normal)
9874 SDValue Inf =
9875 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
9876 SDValue SmallestNormal = DAG.getConstantFP(
9877 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
9878
9879 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
9880 SDValue IsFinite = DAG.getSetCC(DL, ResultVT, Abs, Inf, IsFiniteOp);
9881 SDValue IsNormal =
9882 DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal, IsNormalOp);
9883 unsigned LogicOp = IsInvertedFP ? ISD::OR : ISD::AND;
9884 return DAG.getNode(LogicOp, DL, ResultVT, IsFinite, IsNormal);
9885 }
9886 }
9887 }
9888
9889 // Some checks may be represented as inversion of simpler check, for example
9890 // "inf|normal|subnormal|zero" => !"nan".
9891 bool IsInverted = false;
9892
9893 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test, false)) {
9894 Test = InvertedCheck;
9895 IsInverted = true;
9896 }
9897
9898 // In the general case use integer operations.
9899 unsigned BitSize = OperandVT.getScalarSizeInBits();
9900 EVT IntVT = OperandVT.changeElementType(
9901 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), BitSize));
9902 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
9903
9904 // Various masks.
9905 APInt SignBit = APInt::getSignMask(BitSize);
9906 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9907 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9908 const unsigned ExplicitIntBitInF80 = 63;
9909 APInt ExpMask = Inf;
9910 if (IsF80)
9911 ExpMask.clearBit(ExplicitIntBitInF80);
9912 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9913 APInt QNaNBitMask =
9914 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9915 APInt InversionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
9916
9917 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
9918 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
9919 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
9920 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
9921 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
9922 SDValue ResultInversionMask = DAG.getConstant(InversionMask, DL, ResultVT);
9923
9924 SDValue Res;
9925 const auto appendResult = [&](SDValue PartialRes) {
9926 if (PartialRes) {
9927 if (Res)
9928 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
9929 else
9930 Res = PartialRes;
9931 }
9932 };
9933
9934 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
9935 const auto getIntBitIsSet = [&]() -> SDValue {
9936 if (!IntBitIsSetV) {
9937 APInt IntBitMask(BitSize, 0);
9938 IntBitMask.setBit(ExplicitIntBitInF80);
9939 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
9940 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
9941 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
9942 }
9943 return IntBitIsSetV;
9944 };
9945
9946 // Split the value into sign bit and absolute value.
9947 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
9948 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
9949 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
9950
9951 // Tests that involve more than one class should be processed first.
9952 SDValue PartialRes;
9953
9954 if (IsF80)
9955 ; // Detect finite numbers of f80 by checking individual classes because
9956 // they have different settings of the explicit integer bit.
9957 else if ((Test & fcFinite) == fcFinite) {
9958 // finite(V) ==> (a << 1) < (inf << 1)
9959 //
9960 // See https://github.com/llvm/llvm-project/issues/169270, this is slightly
9961 // shorter than the `finite(V) ==> abs(V) < exp_mask` formula used before.
9962
9964 "finite check requires IEEE-like FP");
9965
9966 SDValue One = DAG.getShiftAmountConstant(1, IntVT, DL);
9967 SDValue TwiceOp = DAG.getNode(ISD::SHL, DL, IntVT, OpAsInt, One);
9968 SDValue TwiceInf = DAG.getNode(ISD::SHL, DL, IntVT, ExpMaskV, One);
9969
9970 PartialRes = DAG.getSetCC(DL, ResultVT, TwiceOp, TwiceInf, ISD::SETULT);
9971 Test &= ~fcFinite;
9972 } else if ((Test & fcFinite) == fcPosFinite) {
9973 // finite(V) && V > 0 ==> V < exp_mask
9974 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
9975 Test &= ~fcPosFinite;
9976 } else if ((Test & fcFinite) == fcNegFinite) {
9977 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
9978 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
9979 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
9980 Test &= ~fcNegFinite;
9981 }
9982 appendResult(PartialRes);
9983
9984 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
9985 // fcZero | fcSubnormal => test all exponent bits are 0
9986 // TODO: Handle sign bit specific cases
9987 if (PartialCheck == (fcZero | fcSubnormal)) {
9988 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
9989 SDValue ExpIsZero =
9990 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
9991 appendResult(ExpIsZero);
9992 Test &= ~PartialCheck & fcAllFlags;
9993 }
9994 }
9995
9996 // Check for individual classes.
9997
9998 if (unsigned PartialCheck = Test & fcZero) {
9999 if (PartialCheck == fcPosZero)
10000 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
10001 else if (PartialCheck == fcZero)
10002 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
10003 else // ISD::fcNegZero
10004 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
10005 appendResult(PartialRes);
10006 }
10007
10008 if (unsigned PartialCheck = Test & fcSubnormal) {
10009 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
10010 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
10011 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
10012 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
10013 SDValue VMinusOneV =
10014 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
10015 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
10016 if (PartialCheck == fcNegSubnormal)
10017 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
10018 appendResult(PartialRes);
10019 }
10020
10021 if (unsigned PartialCheck = Test & fcInf) {
10022 if (PartialCheck == fcPosInf)
10023 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
10024 else if (PartialCheck == fcInf)
10025 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
10026 else { // ISD::fcNegInf
10027 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
10028 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
10029 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
10030 }
10031 appendResult(PartialRes);
10032 }
10033
10034 if (unsigned PartialCheck = Test & fcNan) {
10035 APInt InfWithQnanBit = Inf | QNaNBitMask;
10036 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
10037 if (PartialCheck == fcNan) {
10038 // isnan(V) ==> abs(V) > int(inf)
10039 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
10040 if (IsF80) {
10041 // Recognize unsupported values as NaNs for compatibility with glibc.
10042 // In them (exp(V)==0) == int_bit.
10043 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
10044 SDValue ExpIsZero =
10045 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
10046 SDValue IsPseudo =
10047 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
10048 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
10049 }
10050 } else if (PartialCheck == fcQNan) {
10051 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
10052 PartialRes =
10053 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
10054 } else { // ISD::fcSNan
10055 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
10056 // abs(V) < (unsigned(Inf) | quiet_bit)
10057 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
10058 SDValue IsNotQnan =
10059 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
10060 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
10061 }
10062 appendResult(PartialRes);
10063 }
10064
10065 if (unsigned PartialCheck = Test & fcNormal) {
10066 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
10067 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10068 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
10069 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
10070 APInt ExpLimit = ExpMask - ExpLSB;
10071 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
10072 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
10073 if (PartialCheck == fcNegNormal)
10074 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
10075 else if (PartialCheck == fcPosNormal) {
10076 SDValue PosSignV =
10077 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInversionMask);
10078 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
10079 }
10080 if (IsF80)
10081 PartialRes =
10082 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
10083 appendResult(PartialRes);
10084 }
10085
10086 if (!Res)
10087 return DAG.getConstant(IsInverted, DL, ResultVT);
10088 if (IsInverted)
10089 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInversionMask);
10090 return Res;
10091}
10092
10093// Only expand vector types if we have the appropriate vector bit operations.
10094static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
10095 assert(VT.isVector() && "Expected vector type");
10096 unsigned Len = VT.getScalarSizeInBits();
10097 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
10100 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
10102}
10103
10105 SDLoc dl(Node);
10106 EVT VT = Node->getValueType(0);
10107 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10108 SDValue Op = Node->getOperand(0);
10109 unsigned Len = VT.getScalarSizeInBits();
10110 assert(VT.isInteger() && "CTPOP not implemented for this type.");
10111
10112 // TODO: Add support for irregular type lengths.
10113 if (!(Len <= 128 && Len % 8 == 0))
10114 return SDValue();
10115
10116 // Only expand vector types if we have the appropriate vector bit operations.
10117 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
10118 return SDValue();
10119
10120 // This is the "best" algorithm from
10121 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
10122 SDValue Mask55 =
10123 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
10124 SDValue Mask33 =
10125 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
10126 SDValue Mask0F =
10127 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
10128
10129 // v = v - ((v >> 1) & 0x55555555...)
10130 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
10131 DAG.getNode(ISD::AND, dl, VT,
10132 DAG.getNode(ISD::SRL, dl, VT, Op,
10133 DAG.getConstant(1, dl, ShVT)),
10134 Mask55));
10135 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
10136 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
10137 DAG.getNode(ISD::AND, dl, VT,
10138 DAG.getNode(ISD::SRL, dl, VT, Op,
10139 DAG.getConstant(2, dl, ShVT)),
10140 Mask33));
10141 // v = (v + (v >> 4)) & 0x0F0F0F0F...
10142 Op = DAG.getNode(ISD::AND, dl, VT,
10143 DAG.getNode(ISD::ADD, dl, VT, Op,
10144 DAG.getNode(ISD::SRL, dl, VT, Op,
10145 DAG.getConstant(4, dl, ShVT))),
10146 Mask0F);
10147
10148 if (Len <= 8)
10149 return Op;
10150
10151 // Avoid the multiply if we only have 2 bytes to add.
10152 // TODO: Only doing this for scalars because vectors weren't as obviously
10153 // improved.
10154 if (Len == 16 && !VT.isVector()) {
10155 // v = (v + (v >> 8)) & 0x00FF;
10156 return DAG.getNode(ISD::AND, dl, VT,
10157 DAG.getNode(ISD::ADD, dl, VT, Op,
10158 DAG.getNode(ISD::SRL, dl, VT, Op,
10159 DAG.getConstant(8, dl, ShVT))),
10160 DAG.getConstant(0xFF, dl, VT));
10161 }
10162
10163 // v = (v * 0x01010101...) >> (Len - 8)
10164 SDValue V;
10167 SDValue Mask01 =
10168 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
10169 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
10170 } else {
10171 V = Op;
10172 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
10173 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
10174 V = DAG.getNode(ISD::ADD, dl, VT, V,
10175 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
10176 }
10177 }
10178 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
10179}
10180
10182 SDLoc dl(Node);
10183 EVT VT = Node->getValueType(0);
10184 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10185 SDValue Op = Node->getOperand(0);
10186 SDValue Mask = Node->getOperand(1);
10187 SDValue VL = Node->getOperand(2);
10188 unsigned Len = VT.getScalarSizeInBits();
10189 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
10190
10191 // TODO: Add support for irregular type lengths.
10192 if (!(Len <= 128 && Len % 8 == 0))
10193 return SDValue();
10194
10195 // This is same algorithm of expandCTPOP from
10196 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
10197 SDValue Mask55 =
10198 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
10199 SDValue Mask33 =
10200 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
10201 SDValue Mask0F =
10202 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
10203
10204 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
10205
10206 // v = v - ((v >> 1) & 0x55555555...)
10207 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
10208 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
10209 DAG.getConstant(1, dl, ShVT), Mask, VL),
10210 Mask55, Mask, VL);
10211 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
10212
10213 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
10214 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
10215 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
10216 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
10217 DAG.getConstant(2, dl, ShVT), Mask, VL),
10218 Mask33, Mask, VL);
10219 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
10220
10221 // v = (v + (v >> 4)) & 0x0F0F0F0F...
10222 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
10223 Mask, VL),
10224 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
10225 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
10226
10227 if (Len <= 8)
10228 return Op;
10229
10230 // v = (v * 0x01010101...) >> (Len - 8)
10231 SDValue V;
10233 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
10234 SDValue Mask01 =
10235 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
10236 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
10237 } else {
10238 V = Op;
10239 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
10240 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
10241 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
10242 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
10243 Mask, VL);
10244 }
10245 }
10246 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
10247 Mask, VL);
10248}
10249
10251 SDLoc dl(Node);
10252 EVT VT = Node->getValueType(0);
10253 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10254 SDValue Op = Node->getOperand(0);
10255 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10256
10257 // If the non-ZERO_POISON version is supported we can use that instead.
10258 if (Node->getOpcode() == ISD::CTLZ_ZERO_POISON &&
10260 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
10261
10262 // If the ZERO_POISON version is supported use that and handle the zero case.
10264 EVT SetCCVT =
10265 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10266 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, VT, Op);
10267 SDValue Zero = DAG.getConstant(0, dl, VT);
10268 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
10269 return DAG.getSelect(dl, VT, SrcIsZero,
10270 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
10271 }
10272
10273 // Only expand vector types if we have the appropriate vector bit operations.
10274 // This includes the operations needed to expand CTPOP if it isn't supported.
10275 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
10277 !canExpandVectorCTPOP(*this, VT)) ||
10280 return SDValue();
10281
10282 // for now, we do this:
10283 // x = x | (x >> 1);
10284 // x = x | (x >> 2);
10285 // ...
10286 // x = x | (x >>16);
10287 // x = x | (x >>32); // for 64-bit input
10288 // return popcount(~x);
10289 //
10290 // Ref: "Hacker's Delight" by Henry Warren
10291 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
10292 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
10293 Op = DAG.getNode(ISD::OR, dl, VT, Op,
10294 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
10295 }
10296 Op = DAG.getNOT(dl, Op, VT);
10297 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
10298}
10299
10301 SDLoc dl(Node);
10302 EVT VT = Node->getValueType(0);
10303 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
10304 SDValue Op = Node->getOperand(0);
10305 SDValue Mask = Node->getOperand(1);
10306 SDValue VL = Node->getOperand(2);
10307 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10308
10309 // do this:
10310 // x = x | (x >> 1);
10311 // x = x | (x >> 2);
10312 // ...
10313 // x = x | (x >>16);
10314 // x = x | (x >>32); // for 64-bit input
10315 // return popcount(~x);
10316 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
10317 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
10318 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
10319 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
10320 VL);
10321 }
10322 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
10323 Mask, VL);
10324 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
10325}
10326
10328 SDLoc dl(Node);
10329 EVT VT = Node->getValueType(0);
10330 SDValue Op = DAG.getFreeze(Node->getOperand(0));
10331 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10332
10333 // CTLS(x) = CTLZ(OR(SHL(XOR(x, SRA(x, BW-1)), 1), 1))
10334 // This transforms the sign bits into leading zeros that can be counted.
10335 SDValue ShiftAmt = DAG.getShiftAmountConstant(NumBitsPerElt - 1, VT, dl);
10336 SDValue SignBit = DAG.getNode(ISD::SRA, dl, VT, Op, ShiftAmt);
10337 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, SignBit);
10338 SDValue Shl =
10339 DAG.getNode(ISD::SHL, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
10340 SDValue Or = DAG.getNode(ISD::OR, dl, VT, Shl, DAG.getConstant(1, dl, VT));
10341 return DAG.getNode(ISD::CTLZ_ZERO_POISON, dl, VT, Or);
10342}
10343
10345 const SDLoc &DL, EVT VT, SDValue Op,
10346 unsigned BitWidth) const {
10347 if (BitWidth != 32 && BitWidth != 64)
10348 return SDValue();
10349
10350 const DataLayout &TD = DAG.getDataLayout();
10352 return SDValue();
10353
10354 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
10355 : APInt(64, 0x0218A392CD3D5DBFULL);
10356 MachinePointerInfo PtrInfo =
10358 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
10359 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
10360 SDValue Lookup = DAG.getNode(
10361 ISD::SRL, DL, VT,
10362 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
10363 DAG.getConstant(DeBruijn, DL, VT)),
10364 DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
10366
10368 for (unsigned i = 0; i < BitWidth; i++) {
10369 APInt Shl = DeBruijn.shl(i);
10370 APInt Lshr = Shl.lshr(ShiftAmt);
10371 Table[Lshr.getZExtValue()] = i;
10372 }
10373
10374 // Create a ConstantArray in Constant Pool
10375 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
10376 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
10377 TD.getPrefTypeAlign(CA->getType()));
10378 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
10379 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
10380 PtrInfo, MVT::i8);
10381 if (Node->getOpcode() == ISD::CTTZ_ZERO_POISON)
10382 return ExtLoad;
10383
10384 EVT SetCCVT =
10385 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10386 SDValue Zero = DAG.getConstant(0, DL, VT);
10387 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
10388 return DAG.getSelect(DL, VT, SrcIsZero,
10389 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
10390}
10391
10393 SDLoc dl(Node);
10394 EVT VT = Node->getValueType(0);
10395 SDValue Op = Node->getOperand(0);
10396 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
10397
10398 // If the non-ZERO_POISON version is supported we can use that instead.
10399 if (Node->getOpcode() == ISD::CTTZ_ZERO_POISON &&
10401 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
10402
10403 // If the ZERO_POISON version is supported use that and handle the zero case.
10405 EVT SetCCVT =
10406 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10407 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_POISON, dl, VT, Op);
10408 SDValue Zero = DAG.getConstant(0, dl, VT);
10409 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
10410 return DAG.getSelect(dl, VT, SrcIsZero,
10411 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
10412 }
10413
10414 // Only expand vector types if we have the appropriate vector bit operations.
10415 // This includes the operations needed to expand CTPOP if it isn't supported.
10416 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
10419 !canExpandVectorCTPOP(*this, VT)) ||
10423 return SDValue();
10424
10425 // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
10426 // to be expanded or converted to a libcall.
10429 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
10430 return V;
10431
10432 // for now, we use: { return popcount(~x & (x - 1)); }
10433 // unless the target has ctlz but not ctpop, in which case we use:
10434 // { return 32 - nlz(~x & (x-1)); }
10435 // Ref: "Hacker's Delight" by Henry Warren
10436 SDValue Tmp = DAG.getNode(
10437 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
10438 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
10439
10440 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
10442 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
10443 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
10444 }
10445
10446 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
10447}
10448
10450 SDValue Op = Node->getOperand(0);
10451 SDValue Mask = Node->getOperand(1);
10452 SDValue VL = Node->getOperand(2);
10453 SDLoc dl(Node);
10454 EVT VT = Node->getValueType(0);
10455
10456 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
10457 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
10458 DAG.getAllOnesConstant(dl, VT), Mask, VL);
10459 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
10460 DAG.getConstant(1, dl, VT), Mask, VL);
10461 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
10462 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
10463}
10464
10466 SelectionDAG &DAG) const {
10467 // %cond = to_bool_vec %source
10468 // %splat = splat /*val=*/VL
10469 // %tz = step_vector
10470 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
10471 // %r = vp.reduce.umin %v
10472 SDLoc DL(N);
10473 SDValue Source = N->getOperand(0);
10474 SDValue Mask = N->getOperand(1);
10475 SDValue EVL = N->getOperand(2);
10476 EVT SrcVT = Source.getValueType();
10477 EVT ResVT = N->getValueType(0);
10478 EVT ResVecVT =
10479 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
10480
10481 // Convert to boolean vector.
10482 if (SrcVT.getScalarType() != MVT::i1) {
10483 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
10484 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
10485 SrcVT.getVectorElementCount());
10486 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
10487 DAG.getCondCode(ISD::SETNE), Mask, EVL);
10488 }
10489
10490 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
10491 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
10492 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
10493 SDValue Select =
10494 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
10495 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
10496}
10497
10498/// Returns a type-legalized version of \p Mask as the first item in the
10499/// pair. The second item contains a type-legalized step vector that's
10500/// guaranteed to fit the number of elements in \p Mask.
10501/// If the stepvector would require splitting, returns an empty SDValue
10502/// as the second item to signal that the operation should be split instead.
10503static std::pair<SDValue, SDValue>
10505 SelectionDAG &DAG) {
10506 EVT MaskVT = Mask.getValueType();
10507 EVT BoolVT = MaskVT.getScalarType();
10508
10509 // Find a suitable type for a stepvector.
10510 // If zero is poison, we can assume the upper limit of the result is VF-1.
10511 ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
10512 if (MaskVT.isScalableVector())
10513 VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
10514 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10515 uint64_t EltWidth = TLI.getBitWidthForCttzElements(
10516 EVT(TLI.getVectorIdxTy(DAG.getDataLayout())),
10517 MaskVT.getVectorElementCount(), ZeroIsPoison, &VScaleRange);
10518 // If the step vector element type is smaller than the mask element type,
10519 // use the mask type directly to avoid widening issues.
10520 EltWidth = std::max(EltWidth, BoolVT.getFixedSizeInBits());
10521 EVT StepVT = MVT::getIntegerVT(EltWidth);
10522 EVT StepVecVT = MaskVT.changeVectorElementType(*DAG.getContext(), StepVT);
10523
10524 // If promotion or widening is required to make the type legal, do it here.
10525 // Promotion of integers within LegalizeVectorOps is looking for types of
10526 // the same size but with a smaller number of larger elements, not the usual
10527 // larger size with the same number of larger elements.
10529 TLI.getTypeAction(*DAG.getContext(), StepVecVT);
10530 SDValue StepVec;
10531 if (TypeAction == TargetLowering::TypePromoteInteger) {
10532 StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
10533 StepVec = DAG.getStepVector(DL, StepVecVT);
10534 } else if (TypeAction == TargetLowering::TypeWidenVector) {
10535 // For widening, the element count changes. Create a step vector with only
10536 // the original elements valid and zeros for padding. Also widen the mask.
10537 EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
10538 unsigned WideNumElts = WideVecVT.getVectorNumElements();
10539
10540 // Build widened step vector: <0, 1, ..., OrigNumElts-1, poison, poison, ..>
10541 SDValue OrigStepVec = DAG.getStepVector(DL, StepVecVT);
10542 SDValue UndefStep = DAG.getPOISON(WideVecVT);
10543 StepVec = DAG.getInsertSubvector(DL, UndefStep, OrigStepVec, 0);
10544
10545 // Widen mask: pad with zeros.
10546 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), BoolVT, WideNumElts);
10547 SDValue ZeroMask = DAG.getConstant(0, DL, WideMaskVT);
10548 Mask = DAG.getInsertSubvector(DL, ZeroMask, Mask, 0);
10549 } else if (TypeAction == TargetLowering::TypeSplitVector) {
10550 // The stepvector type would require splitting. Signal to the caller
10551 // that the operation should be split instead of expanded.
10552 return {Mask, SDValue()};
10553 } else {
10554 StepVec = DAG.getStepVector(DL, StepVecVT);
10555 }
10556
10557 return {Mask, StepVec};
10558}
10559
10561 SelectionDAG &DAG) const {
10562 SDLoc DL(N);
10563 auto [Mask, StepVec] = getLegalMaskAndStepVector(
10564 N->getOperand(0), /*ZeroIsPoison=*/true, DL, DAG);
10565
10566 // If StepVec is empty, the stepvector would require splitting.
10567 // Split the operation instead and let it be recursively legalized.
10568 if (!StepVec) {
10569 EVT MaskVT = N->getOperand(0).getValueType();
10570 EVT ResVT = N->getValueType(0);
10571
10572 // Split the mask
10573 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(MaskVT);
10574 auto [MaskLo, MaskHi] = DAG.SplitVector(N->getOperand(0), DL);
10575
10576 // Create split VECTOR_FIND_LAST_ACTIVE operations
10577 SDValue LoResult =
10578 DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, ResVT, MaskLo);
10579 SDValue HiResult =
10580 DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, ResVT, MaskHi);
10581
10582 // Check if any lane is active in the high mask.
10583 SDValue AnyHiActive = DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, MaskHi);
10585 AnyHiActive, DL,
10586 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i1),
10587 MVT::i1);
10588
10589 // Adjust HiResult by adding the number of elements in Lo
10590 SDValue LoNumElts =
10591 DAG.getElementCount(DL, ResVT, LoVT.getVectorElementCount());
10592 SDValue AdjustedHiResult =
10593 DAG.getNode(ISD::ADD, DL, ResVT, HiResult, LoNumElts);
10594
10595 // Return: AnyHiActive ? AdjustedHiResult : LoResult;
10596 return DAG.getNode(ISD::SELECT, DL, ResVT, Cond, AdjustedHiResult,
10597 LoResult);
10598 }
10599
10600 EVT StepVecVT = StepVec.getValueType();
10601 EVT StepVT = StepVec.getValueType().getVectorElementType();
10602
10603 // Zero out lanes with inactive elements, then find the highest remaining
10604 // value from the stepvector.
10605 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
10606 SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
10607 SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
10608 return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
10609}
10610
10612 SelectionDAG &DAG) const {
10613 SDLoc DL(N);
10614 EVT VT = N->getValueType(0);
10615 SDValue SourceValue = N->getOperand(0);
10616 SDValue SinkValue = N->getOperand(1);
10617 SDValue EltSizeInBytes = N->getOperand(2);
10618
10619 // Note: The lane offset is scalable if the mask is scalable.
10620 ElementCount LaneOffsetEC =
10621 ElementCount::get(N->getConstantOperandVal(3), VT.isScalableVT());
10622
10623 EVT AddrVT = SourceValue->getValueType(0);
10624 bool IsReadAfterWrite = N->getOpcode() == ISD::LOOP_DEPENDENCE_RAW_MASK;
10625
10626 // Take the difference between the pointers and divided by the element size,
10627 // to see how many lanes separate them.
10628 SDValue Diff = DAG.getNode(ISD::SUB, DL, AddrVT, SinkValue, SourceValue);
10629 if (IsReadAfterWrite)
10630 Diff = DAG.getNode(ISD::ABS, DL, AddrVT, Diff);
10631 Diff = DAG.getNode(ISD::SDIV, DL, AddrVT, Diff, EltSizeInBytes);
10632
10633 // The pointers do not alias if:
10634 // * Diff <= 0 (WAR_MASK)
10635 // * Diff == 0 (RAW_MASK)
10636 EVT CmpVT =
10637 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), AddrVT);
10638 SDValue Zero = DAG.getConstant(0, DL, AddrVT);
10639 SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero,
10640 IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
10641
10642 // The pointers do not alias if:
10643 // Lane + LaneOffset < Diff (WAR/RAW_MASK)
10644 SDValue LaneOffset = DAG.getElementCount(DL, AddrVT, LaneOffsetEC);
10645 SDValue MaskN = DAG.getSelect(
10646 DL, AddrVT, Cmp,
10648 AddrVT),
10649 Diff);
10650
10651 return DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, VT, LaneOffset, MaskN);
10652}
10653
10655 bool IsNegative) const {
10656 SDLoc dl(N);
10657 EVT VT = N->getValueType(0);
10658 SDValue Op = N->getOperand(0);
10659
10660 // If expanding ABS_MIN_POISON, fall back to ABS if the target supports it.
10661 if (N->getOpcode() == ISD::ABS_MIN_POISON &&
10663 SDValue AbsVal = DAG.getNode(ISD::ABS, dl, VT, Op);
10664 if (IsNegative)
10665 return DAG.getNegative(AbsVal, dl, VT);
10666 return AbsVal;
10667 }
10668
10669 // abs(x) -> smax(x,sub(0,x))
10670 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
10672 SDValue Zero = DAG.getConstant(0, dl, VT);
10673 Op = DAG.getFreeze(Op);
10674 return DAG.getNode(ISD::SMAX, dl, VT, Op,
10675 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10676 }
10677
10678 // abs(x) -> umin(x,sub(0,x))
10679 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
10681 SDValue Zero = DAG.getConstant(0, dl, VT);
10682 Op = DAG.getFreeze(Op);
10683 return DAG.getNode(ISD::UMIN, dl, VT, Op,
10684 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10685 }
10686
10687 // 0 - abs(x) -> smin(x, sub(0,x))
10688 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
10690 SDValue Zero = DAG.getConstant(0, dl, VT);
10691 Op = DAG.getFreeze(Op);
10692 return DAG.getNode(ISD::SMIN, dl, VT, Op,
10693 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
10694 }
10695
10696 // Only expand vector types if we have the appropriate vector operations.
10697 if (VT.isVector() &&
10699 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
10700 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
10702 return SDValue();
10703
10704 Op = DAG.getFreeze(Op);
10705 SDValue Shift = DAG.getNode(
10706 ISD::SRA, dl, VT, Op,
10707 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
10708 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
10709
10710 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
10711 if (!IsNegative)
10712 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
10713
10714 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
10715 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
10716}
10717
10719 SDLoc dl(N);
10720 EVT VT = N->getValueType(0);
10721 SDValue LHS = N->getOperand(0);
10722 SDValue RHS = N->getOperand(1);
10723 bool IsSigned = N->getOpcode() == ISD::ABDS;
10724
10725 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
10726 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
10727 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
10728 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
10729 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
10730 LHS = DAG.getFreeze(LHS);
10731 RHS = DAG.getFreeze(RHS);
10732 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
10733 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
10734 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
10735 }
10736
10737 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
10738 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT)) {
10739 LHS = DAG.getFreeze(LHS);
10740 RHS = DAG.getFreeze(RHS);
10741 return DAG.getNode(ISD::OR, dl, VT,
10742 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
10743 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
10744 }
10745
10746 // If the subtract doesn't overflow then just use abs(sub())
10747 bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
10748
10749 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
10750 return DAG.getNode(ISD::ABS, dl, VT,
10751 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
10752
10753 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
10754 return DAG.getNode(ISD::ABS, dl, VT,
10755 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
10756
10757 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10759 LHS = DAG.getFreeze(LHS);
10760 RHS = DAG.getFreeze(RHS);
10761 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
10762
10763 // Branchless expansion iff cmp result is allbits:
10764 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
10765 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
10766 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10767 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
10768 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
10769 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
10770 }
10771
10772 // Similar to the branchless expansion, if we don't prefer selects, use the
10773 // (sign-extended) usubo overflow flag if the (scalar) type is illegal as this
10774 // is more likely to legalize cleanly: abdu(lhs, rhs) -> sub(xor(sub(lhs,
10775 // rhs), uof(lhs, rhs)), uof(lhs, rhs))
10776 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT) &&
10778 SDValue USubO =
10779 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
10780 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
10781 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
10782 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
10783 }
10784
10785 // FIXME: Should really try to split the vector in case it's legal on a
10786 // subvector.
10788 return DAG.UnrollVectorOp(N);
10789
10790 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10791 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10792 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
10793 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
10794}
10795
10797 SDLoc dl(N);
10798 EVT VT = N->getValueType(0);
10799 SDValue LHS = N->getOperand(0);
10800 SDValue RHS = N->getOperand(1);
10801
10802 unsigned Opc = N->getOpcode();
10803 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
10804 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
10805 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
10806 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
10807 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
10808 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10810 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
10811 "Unknown AVG node");
10812
10813 // If the operands are already extended, we can add+shift.
10814 bool IsExt =
10815 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
10816 DAG.ComputeNumSignBits(RHS) >= 2) ||
10817 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
10818 DAG.computeKnownBits(RHS).countMinLeadingZeros() >= 1);
10819 if (IsExt) {
10820 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
10821 if (!IsFloor)
10822 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
10823 return DAG.getNode(ShiftOpc, dl, VT, Sum,
10824 DAG.getShiftAmountConstant(1, VT, dl));
10825 }
10826
10827 // For scalars, see if we can efficiently extend/truncate to use add+shift.
10828 if (VT.isScalarInteger()) {
10829 EVT ExtVT = VT.widenIntegerElementType(*DAG.getContext());
10830 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
10831 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
10832 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
10833 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
10834 if (!IsFloor)
10835 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
10836 DAG.getConstant(1, dl, ExtVT));
10837 // Just use SRL as we will be truncating away the extended sign bits.
10838 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
10839 DAG.getShiftAmountConstant(1, ExtVT, dl));
10840 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
10841 }
10842 }
10843
10844 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
10845 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT) &&
10848 SDValue UAddWithOverflow =
10849 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
10850
10851 SDValue Sum = UAddWithOverflow.getValue(0);
10852 SDValue Overflow = UAddWithOverflow.getValue(1);
10853
10854 // Right shift the sum by 1
10855 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum,
10856 DAG.getShiftAmountConstant(1, VT, dl));
10857
10858 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
10859 SDValue OverflowShl = DAG.getNode(
10860 ISD::SHL, dl, VT, ZeroExtOverflow,
10861 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
10862
10863 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
10864 }
10865
10866 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
10867 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
10868 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
10869 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
10870 LHS = DAG.getFreeze(LHS);
10871 RHS = DAG.getFreeze(RHS);
10872 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
10873 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10874 SDValue Shift =
10875 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
10876 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
10877}
10878
10880 SDLoc dl(N);
10881 EVT VT = N->getValueType(0);
10882 SDValue Op = N->getOperand(0);
10883
10884 if (!VT.isSimple())
10885 return SDValue();
10886
10887 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10888 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10889 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10890 default:
10891 return SDValue();
10892 case MVT::i16:
10893 // Use a rotate by 8. This can be further expanded if necessary.
10894 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10895 case MVT::i32:
10896 // This is meant for ARM specifically, which has ROTR but no ROTL.
10897 // t = x ^ rotr(x, 16)
10898 // t = bic(t, 0x00ff0000)
10899 // t = lshr(t, 8)
10900 // x = t ^ rotr(x, 8)
10902 SDValue Rotr16 =
10903 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(16, dl, SHVT));
10904 SDValue Tmp = DAG.getNode(ISD::XOR, dl, VT, Op, Rotr16);
10905 Tmp = DAG.getNode(ISD::AND, dl, VT, Tmp,
10906 DAG.getConstant(0xFF00FFFF, dl, VT));
10907 Tmp = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(8, dl, SHVT));
10908 SDValue Rotr8 =
10909 DAG.getNode(ISD::ROTR, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10910 return DAG.getNode(ISD::XOR, dl, VT, Tmp, Rotr8);
10911 }
10912 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10913 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
10914 DAG.getConstant(0xFF00, dl, VT));
10915 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
10916 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10917 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
10918 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10919 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10920 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10921 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10922 case MVT::i64:
10923 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10924 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
10925 DAG.getConstant(255ULL<<8, dl, VT));
10926 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
10927 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
10928 DAG.getConstant(255ULL<<16, dl, VT));
10929 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
10930 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
10931 DAG.getConstant(255ULL<<24, dl, VT));
10932 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
10933 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
10934 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
10935 DAG.getConstant(255ULL<<24, dl, VT));
10936 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
10937 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
10938 DAG.getConstant(255ULL<<16, dl, VT));
10939 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
10940 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
10941 DAG.getConstant(255ULL<<8, dl, VT));
10942 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
10943 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
10944 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
10945 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
10946 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
10947 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
10948 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
10949 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
10950 }
10951}
10952
10954 SDLoc dl(N);
10955 EVT VT = N->getValueType(0);
10956 SDValue Op = N->getOperand(0);
10957 SDValue Mask = N->getOperand(1);
10958 SDValue EVL = N->getOperand(2);
10959
10960 if (!VT.isSimple())
10961 return SDValue();
10962
10963 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
10964 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
10965 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
10966 default:
10967 return SDValue();
10968 case MVT::i16:
10969 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10970 Mask, EVL);
10971 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10972 Mask, EVL);
10973 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
10974 case MVT::i32:
10975 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10976 Mask, EVL);
10977 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
10978 Mask, EVL);
10979 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
10980 Mask, EVL);
10981 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
10982 Mask, EVL);
10983 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
10984 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
10985 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
10986 Mask, EVL);
10987 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
10988 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
10989 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
10990 case MVT::i64:
10991 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
10992 Mask, EVL);
10993 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10994 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
10995 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
10996 Mask, EVL);
10997 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
10998 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
10999 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
11000 Mask, EVL);
11001 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
11002 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
11003 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
11004 Mask, EVL);
11005 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
11006 Mask, EVL);
11007 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
11008 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
11009 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
11010 Mask, EVL);
11011 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
11012 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
11013 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
11014 Mask, EVL);
11015 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11016 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
11017 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
11018 Mask, EVL);
11019 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
11020 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
11021 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
11022 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
11023 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
11024 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
11025 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
11026 }
11027}
11028
11030 SDLoc dl(N);
11031 EVT VT = N->getValueType(0);
11032 SDValue Op = N->getOperand(0);
11033 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
11034 unsigned Sz = VT.getScalarSizeInBits();
11035
11036 SDValue Tmp, Tmp2, Tmp3;
11037
11038 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
11039 // and finally the i1 pairs.
11040 // TODO: We can easily support i4/i2 legal types if any target ever does.
11041 if (Sz >= 8 && isPowerOf2_32(Sz)) {
11042 // Create the masks - repeating the pattern every byte.
11043 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
11044 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
11045 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
11046
11047 // BSWAP if the type is wider than a single byte.
11048 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
11049
11050 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
11051 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
11052 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
11053 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
11054 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
11055 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11056
11057 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
11058 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
11059 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
11060 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
11061 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
11062 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11063
11064 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
11065 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
11066 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
11067 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
11068 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
11069 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
11070 return Tmp;
11071 }
11072
11073 Tmp = DAG.getConstant(0, dl, VT);
11074 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
11075 if (I < J)
11076 Tmp2 =
11077 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
11078 else
11079 Tmp2 =
11080 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
11081
11082 APInt Shift = APInt::getOneBitSet(Sz, J);
11083 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
11084 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
11085 }
11086
11087 return Tmp;
11088}
11089
11091 assert(N->getOpcode() == ISD::VP_BITREVERSE);
11092
11093 SDLoc dl(N);
11094 EVT VT = N->getValueType(0);
11095 SDValue Op = N->getOperand(0);
11096 SDValue Mask = N->getOperand(1);
11097 SDValue EVL = N->getOperand(2);
11098 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
11099 unsigned Sz = VT.getScalarSizeInBits();
11100
11101 SDValue Tmp, Tmp2, Tmp3;
11102
11103 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
11104 // and finally the i1 pairs.
11105 // TODO: We can easily support i4/i2 legal types if any target ever does.
11106 if (Sz >= 8 && isPowerOf2_32(Sz)) {
11107 // Create the masks - repeating the pattern every byte.
11108 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
11109 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
11110 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
11111
11112 // BSWAP if the type is wider than a single byte.
11113 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
11114
11115 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
11116 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
11117 Mask, EVL);
11118 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11119 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
11120 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
11121 Mask, EVL);
11122 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
11123 Mask, EVL);
11124 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11125
11126 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
11127 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
11128 Mask, EVL);
11129 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11130 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
11131 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
11132 Mask, EVL);
11133 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
11134 Mask, EVL);
11135 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11136
11137 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
11138 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
11139 Mask, EVL);
11140 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
11141 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
11142 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
11143 Mask, EVL);
11144 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
11145 Mask, EVL);
11146 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
11147 return Tmp;
11148 }
11149 return SDValue();
11150}
11151
11152std::pair<SDValue, SDValue>
11154 SelectionDAG &DAG) const {
11155 SDLoc SL(LD);
11156 SDValue Chain = LD->getChain();
11157 SDValue BasePTR = LD->getBasePtr();
11158 EVT SrcVT = LD->getMemoryVT();
11159 EVT DstVT = LD->getValueType(0);
11160 ISD::LoadExtType ExtType = LD->getExtensionType();
11161
11162 if (SrcVT.isScalableVector())
11163 report_fatal_error("Cannot scalarize scalable vector loads");
11164
11165 unsigned NumElem = SrcVT.getVectorNumElements();
11166
11167 EVT SrcEltVT = SrcVT.getScalarType();
11168 EVT DstEltVT = DstVT.getScalarType();
11169
11170 // A vector must always be stored in memory as-is, i.e. without any padding
11171 // between the elements, since various code depend on it, e.g. in the
11172 // handling of a bitcast of a vector type to int, which may be done with a
11173 // vector store followed by an integer load. A vector that does not have
11174 // elements that are byte-sized must therefore be stored as an integer
11175 // built out of the extracted vector elements.
11176 if (!SrcEltVT.isByteSized()) {
11177 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
11178 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
11179
11180 unsigned NumSrcBits = SrcVT.getSizeInBits();
11181 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
11182
11183 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
11184 SDValue SrcEltBitMask = DAG.getConstant(
11185 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
11186
11187 // Load the whole vector and avoid masking off the top bits as it makes
11188 // the codegen worse.
11189 SDValue Load =
11190 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
11191 LD->getPointerInfo(), SrcIntVT, LD->getBaseAlign(),
11192 LD->getMemOperand()->getFlags(), LD->getAAInfo());
11193
11195 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11196 unsigned ShiftIntoIdx =
11197 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
11198 SDValue ShiftAmount = DAG.getShiftAmountConstant(
11199 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
11200 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
11201 SDValue Elt =
11202 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
11203 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
11204
11205 if (ExtType != ISD::NON_EXTLOAD) {
11206 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
11207 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
11208 }
11209
11210 Vals.push_back(Scalar);
11211 }
11212
11213 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
11214 return std::make_pair(Value, Load.getValue(1));
11215 }
11216
11217 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
11218 assert(SrcEltVT.isByteSized());
11219
11221 SmallVector<SDValue, 8> LoadChains;
11222
11223 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11224 SDValue ScalarLoad = DAG.getExtLoad(
11225 ExtType, SL, DstEltVT, Chain, BasePTR,
11226 LD->getPointerInfo().getWithOffset(Idx * Stride), SrcEltVT,
11227 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
11228
11229 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
11230
11231 Vals.push_back(ScalarLoad.getValue(0));
11232 LoadChains.push_back(ScalarLoad.getValue(1));
11233 }
11234
11235 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
11236 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
11237
11238 return std::make_pair(Value, NewChain);
11239}
11240
11242 SelectionDAG &DAG) const {
11243 SDLoc SL(ST);
11244
11245 SDValue Chain = ST->getChain();
11246 SDValue BasePtr = ST->getBasePtr();
11247 SDValue Value = ST->getValue();
11248 EVT StVT = ST->getMemoryVT();
11249
11250 if (StVT.isScalableVector())
11251 report_fatal_error("Cannot scalarize scalable vector stores");
11252
11253 // The type of the data we want to save
11254 EVT RegVT = Value.getValueType();
11255 EVT RegSclVT = RegVT.getScalarType();
11256
11257 // The type of data as saved in memory.
11258 EVT MemSclVT = StVT.getScalarType();
11259
11260 unsigned NumElem = StVT.getVectorNumElements();
11261
11262 // A vector must always be stored in memory as-is, i.e. without any padding
11263 // between the elements, since various code depend on it, e.g. in the
11264 // handling of a bitcast of a vector type to int, which may be done with a
11265 // vector store followed by an integer load. A vector that does not have
11266 // elements that are byte-sized must therefore be stored as an integer
11267 // built out of the extracted vector elements.
11268 if (!MemSclVT.isByteSized()) {
11269 unsigned NumBits = StVT.getSizeInBits();
11270 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
11271
11272 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
11273
11274 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11275 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
11276 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
11277 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
11278 unsigned ShiftIntoIdx =
11279 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
11280 SDValue ShiftAmount =
11281 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
11282 SDValue ShiftedElt =
11283 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
11284 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
11285 }
11286
11287 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
11288 ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
11289 ST->getAAInfo());
11290 }
11291
11292 // Store Stride in bytes
11293 unsigned Stride = MemSclVT.getSizeInBits() / 8;
11294 assert(Stride && "Zero stride!");
11295 // Extract each of the elements from the original vector and save them into
11296 // memory individually.
11298 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
11299 SDValue Elt = DAG.getExtractVectorElt(SL, RegSclVT, Value, Idx);
11300
11301 SDValue Ptr =
11302 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
11303
11304 // This scalar TruncStore may be illegal, but we legalize it later.
11305 SDValue Store = DAG.getTruncStore(
11306 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
11307 MemSclVT, ST->getBaseAlign(), ST->getMemOperand()->getFlags(),
11308 ST->getAAInfo());
11309
11310 Stores.push_back(Store);
11311 }
11312
11313 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
11314}
11315
11316std::pair<SDValue, SDValue>
11318 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
11319 "unaligned indexed loads not implemented!");
11320 SDValue Chain = LD->getChain();
11321 SDValue Ptr = LD->getBasePtr();
11322 EVT VT = LD->getValueType(0);
11323 EVT LoadedVT = LD->getMemoryVT();
11324 SDLoc dl(LD);
11325 auto &MF = DAG.getMachineFunction();
11326
11327 if (VT.isFloatingPoint() || VT.isVector()) {
11328 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
11329 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
11330 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
11331 LoadedVT.isVector()) {
11332 // Scalarize the load and let the individual components be handled.
11333 return scalarizeVectorLoad(LD, DAG);
11334 }
11335
11336 // Expand to a (misaligned) integer load of the same size,
11337 // then bitconvert to floating point or vector.
11338 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
11339 LD->getMemOperand());
11340 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
11341 if (LoadedVT != VT)
11342 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
11343 ISD::ANY_EXTEND, dl, VT, Result);
11344
11345 return std::make_pair(Result, newLoad.getValue(1));
11346 }
11347
11348 // Copy the value to a (aligned) stack slot using (unaligned) integer
11349 // loads and stores, then do a (aligned) load from the stack slot.
11350 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
11351 unsigned LoadedBytes = LoadedVT.getStoreSize();
11352 unsigned RegBytes = RegVT.getSizeInBits() / 8;
11353 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
11354
11355 // Make sure the stack slot is also aligned for the register type.
11356 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
11357 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
11359 SDValue StackPtr = StackBase;
11360 unsigned Offset = 0;
11361
11362 EVT PtrVT = Ptr.getValueType();
11363 EVT StackPtrVT = StackPtr.getValueType();
11364
11365 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
11366 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
11367
11368 // Do all but one copies using the full register width.
11369 for (unsigned i = 1; i < NumRegs; i++) {
11370 // Load one integer register's worth from the original location.
11371 SDValue Load = DAG.getLoad(
11372 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
11373 LD->getBaseAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
11374 // Follow the load with a store to the stack slot. Remember the store.
11375 Stores.push_back(DAG.getStore(
11376 Load.getValue(1), dl, Load, StackPtr,
11377 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
11378 // Increment the pointers.
11379 Offset += RegBytes;
11380
11381 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
11382 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
11383 }
11384
11385 // The last copy may be partial. Do an extending load.
11386 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
11387 8 * (LoadedBytes - Offset));
11388 SDValue Load = DAG.getExtLoad(
11389 ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
11390 LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->getBaseAlign(),
11391 LD->getMemOperand()->getFlags(), LD->getAAInfo());
11392 // Follow the load with a store to the stack slot. Remember the store.
11393 // On big-endian machines this requires a truncating store to ensure
11394 // that the bits end up in the right place.
11395 Stores.push_back(DAG.getTruncStore(
11396 Load.getValue(1), dl, Load, StackPtr,
11397 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
11398
11399 // The order of the stores doesn't matter - say it with a TokenFactor.
11400 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
11401
11402 // Finally, perform the original load only redirected to the stack slot.
11403 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
11404 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
11405 LoadedVT);
11406
11407 // Callers expect a MERGE_VALUES node.
11408 return std::make_pair(Load, TF);
11409 }
11410
11411 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
11412 "Unaligned load of unsupported type.");
11413
11414 // Compute the new VT that is half the size of the old one. This is an
11415 // integer MVT.
11416 unsigned NumBits = LoadedVT.getSizeInBits();
11417 EVT NewLoadedVT;
11418 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
11419 NumBits >>= 1;
11420
11421 Align Alignment = LD->getBaseAlign();
11422 unsigned IncrementSize = NumBits / 8;
11423 ISD::LoadExtType HiExtType = LD->getExtensionType();
11424
11425 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
11426 if (HiExtType == ISD::NON_EXTLOAD)
11427 HiExtType = ISD::ZEXTLOAD;
11428
11429 // Load the value in two parts
11430 SDValue Lo, Hi;
11431 if (DAG.getDataLayout().isLittleEndian()) {
11432 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
11433 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11434 LD->getAAInfo());
11435
11436 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11437 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
11438 LD->getPointerInfo().getWithOffset(IncrementSize),
11439 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11440 LD->getAAInfo());
11441 } else {
11442 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
11443 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11444 LD->getAAInfo());
11445
11446 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11447 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
11448 LD->getPointerInfo().getWithOffset(IncrementSize),
11449 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
11450 LD->getAAInfo());
11451 }
11452
11453 // aggregate the two parts
11454 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
11455 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
11456 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
11457
11458 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
11459 Hi.getValue(1));
11460
11461 return std::make_pair(Result, TF);
11462}
11463
11465 SelectionDAG &DAG) const {
11466 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
11467 "unaligned indexed stores not implemented!");
11468 SDValue Chain = ST->getChain();
11469 SDValue Ptr = ST->getBasePtr();
11470 SDValue Val = ST->getValue();
11471 EVT VT = Val.getValueType();
11472 Align Alignment = ST->getBaseAlign();
11473 auto &MF = DAG.getMachineFunction();
11474 EVT StoreMemVT = ST->getMemoryVT();
11475
11476 SDLoc dl(ST);
11477 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
11478 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
11479 if (isTypeLegal(intVT)) {
11480 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
11481 StoreMemVT.isVector()) {
11482 // Scalarize the store and let the individual components be handled.
11483 SDValue Result = scalarizeVectorStore(ST, DAG);
11484 return Result;
11485 }
11486 // Expand to a bitconvert of the value to the integer type of the
11487 // same size, then a (misaligned) int store.
11488 // FIXME: Does not handle truncating floating point stores!
11489 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
11490 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
11491 Alignment, ST->getMemOperand()->getFlags());
11492 return Result;
11493 }
11494 // Do a (aligned) store to a stack slot, then copy from the stack slot
11495 // to the final destination using (unaligned) integer loads and stores.
11496 MVT RegVT = getRegisterType(
11497 *DAG.getContext(),
11498 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
11499 EVT PtrVT = Ptr.getValueType();
11500 unsigned StoredBytes = StoreMemVT.getStoreSize();
11501 unsigned RegBytes = RegVT.getSizeInBits() / 8;
11502 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
11503
11504 // Make sure the stack slot is also aligned for the register type.
11505 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
11506 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11507
11508 // Perform the original store, only redirected to the stack slot.
11509 SDValue Store = DAG.getTruncStore(
11510 Chain, dl, Val, StackPtr,
11511 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
11512
11513 EVT StackPtrVT = StackPtr.getValueType();
11514
11515 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
11516 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
11518 unsigned Offset = 0;
11519
11520 // Do all but one copies using the full register width.
11521 for (unsigned i = 1; i < NumRegs; i++) {
11522 // Load one integer register's worth from the stack slot.
11523 SDValue Load = DAG.getLoad(
11524 RegVT, dl, Store, StackPtr,
11525 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
11526 // Store it to the final location. Remember the store.
11527 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
11528 ST->getPointerInfo().getWithOffset(Offset),
11529 ST->getBaseAlign(),
11530 ST->getMemOperand()->getFlags()));
11531 // Increment the pointers.
11532 Offset += RegBytes;
11533 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
11534 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
11535 }
11536
11537 // The last store may be partial. Do a truncating store. On big-endian
11538 // machines this requires an extending load from the stack slot to ensure
11539 // that the bits are in the right place.
11540 EVT LoadMemVT =
11541 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
11542
11543 // Load from the stack slot.
11544 SDValue Load = DAG.getExtLoad(
11545 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
11546 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
11547
11548 Stores.push_back(DAG.getTruncStore(
11549 Load.getValue(1), dl, Load, Ptr,
11550 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
11551 ST->getBaseAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()));
11552 // The order of the stores doesn't matter - say it with a TokenFactor.
11553 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
11554 return Result;
11555 }
11556
11557 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
11558 "Unaligned store of unknown type.");
11559 // Get the half-size VT
11560 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
11561 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
11562 unsigned IncrementSize = NumBits / 8;
11563
11564 // Divide the stored value in two parts.
11565 SDValue ShiftAmount =
11566 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
11567 SDValue Lo = Val;
11568 // If Val is a constant, replace the upper bits with 0. The SRL will constant
11569 // fold and not use the upper bits. A smaller constant may be easier to
11570 // materialize.
11571 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
11572 Lo = DAG.getNode(
11573 ISD::AND, dl, VT, Lo,
11574 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
11575 VT));
11576 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
11577
11578 // Store the two parts
11579 SDValue Store1, Store2;
11580 Store1 = DAG.getTruncStore(Chain, dl,
11581 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
11582 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
11583 ST->getMemOperand()->getFlags());
11584
11585 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
11586 Store2 = DAG.getTruncStore(
11587 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
11588 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
11589 ST->getMemOperand()->getFlags(), ST->getAAInfo());
11590
11591 SDValue Result =
11592 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
11593 return Result;
11594}
11595
11596SDValue
11598 const SDLoc &DL, EVT DataVT,
11599 SelectionDAG &DAG,
11600 bool IsCompressedMemory) const {
11602 EVT AddrVT = Addr.getValueType();
11603 EVT MaskVT = Mask.getValueType();
11604 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
11605 "Incompatible types of Data and Mask");
11606 if (IsCompressedMemory) {
11607 // Incrementing the pointer according to number of '1's in the mask.
11608 if (DataVT.isScalableVector()) {
11609 EVT MaskExtVT = MaskVT.changeElementType(*DAG.getContext(), MVT::i32);
11610 SDValue MaskExt = DAG.getNode(ISD::ZERO_EXTEND, DL, MaskExtVT, Mask);
11611 Increment = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, MaskExt);
11612 } else {
11613 EVT MaskIntVT =
11614 EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
11615 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
11616 if (MaskIntVT.getSizeInBits() < 32) {
11617 MaskInIntReg =
11618 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
11619 MaskIntVT = MVT::i32;
11620 }
11621 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
11622 }
11623 // Scale is an element size in bytes.
11624 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
11625 AddrVT);
11626 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
11627 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
11628 } else
11629 Increment = DAG.getTypeSize(DL, AddrVT, DataVT.getStoreSize());
11630
11631 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
11632}
11633
11635 EVT VecVT, const SDLoc &dl,
11636 ElementCount SubEC) {
11637 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
11638 "Cannot index a scalable vector within a fixed-width vector");
11639
11640 unsigned NElts = VecVT.getVectorMinNumElements();
11641 unsigned NumSubElts = SubEC.getKnownMinValue();
11642 EVT IdxVT = Idx.getValueType();
11643
11644 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
11645 // If this is a constant index and we know the value plus the number of the
11646 // elements in the subvector minus one is less than the minimum number of
11647 // elements then it's safe to return Idx.
11648 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
11649 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
11650 return Idx;
11651 SDValue VS =
11652 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
11653 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
11654 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
11655 DAG.getConstant(NumSubElts, dl, IdxVT));
11656 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
11657 }
11658 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
11659 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
11660 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
11661 DAG.getConstant(Imm, dl, IdxVT));
11662 }
11663 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
11664 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
11665 DAG.getConstant(MaxIndex, dl, IdxVT));
11666}
11667
11668SDValue
11670 EVT VecVT, SDValue Index,
11671 const SDNodeFlags PtrArithFlags) const {
11673 DAG, VecPtr, VecVT,
11675 Index, PtrArithFlags);
11676}
11677
11678SDValue
11680 EVT VecVT, EVT SubVecVT, SDValue Index,
11681 const SDNodeFlags PtrArithFlags) const {
11682 SDLoc dl(Index);
11683 // Make sure the index type is big enough to compute in.
11684 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
11685
11686 EVT EltVT = VecVT.getVectorElementType();
11687
11688 // Calculate the element offset and add it to the pointer.
11689 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
11690 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
11691 "Converting bits to bytes lost precision");
11692 assert(SubVecVT.getVectorElementType() == EltVT &&
11693 "Sub-vector must be a vector with matching element type");
11694 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
11695 SubVecVT.getVectorElementCount());
11696
11697 EVT IdxVT = Index.getValueType();
11698 if (SubVecVT.isScalableVector())
11699 Index =
11700 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
11701 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
11702
11703 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
11704 DAG.getConstant(EltSize, dl, IdxVT));
11705 return DAG.getMemBasePlusOffset(VecPtr, Index, dl, PtrArithFlags);
11706}
11707
11708//===----------------------------------------------------------------------===//
11709// Implementation of Emulated TLS Model
11710//===----------------------------------------------------------------------===//
11711
11713 SelectionDAG &DAG) const {
11714 // Access to address of TLS varialbe xyz is lowered to a function call:
11715 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
11716 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11717 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
11718 SDLoc dl(GA);
11719
11720 ArgListTy Args;
11721 const GlobalValue *GV =
11723 SmallString<32> NameString("__emutls_v.");
11724 NameString += GV->getName();
11725 StringRef EmuTlsVarName(NameString);
11726 const GlobalVariable *EmuTlsVar =
11727 GV->getParent()->getNamedGlobal(EmuTlsVarName);
11728 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
11729 Args.emplace_back(DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT), VoidPtrType);
11730
11731 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
11732
11734 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
11735 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
11736 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
11737
11738 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
11739 // At last for X86 targets, maybe good for other targets too?
11741 MFI.setAdjustsStack(true); // Is this only for X86 target?
11742 MFI.setHasCalls(true);
11743
11744 assert((GA->getOffset() == 0) &&
11745 "Emulated TLS must have zero offset in GlobalAddressSDNode");
11746 return CallResult.first;
11747}
11748
11750 SelectionDAG &DAG) const {
11751 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
11752 if (!isCtlzFast())
11753 return SDValue();
11754 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11755 SDLoc dl(Op);
11756 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
11757 EVT VT = Op.getOperand(0).getValueType();
11758 SDValue Zext = Op.getOperand(0);
11759 if (VT.bitsLT(MVT::i32)) {
11760 VT = MVT::i32;
11761 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
11762 }
11763 unsigned Log2b = Log2_32(VT.getSizeInBits());
11764 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
11765 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
11766 DAG.getConstant(Log2b, dl, MVT::i32));
11767 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
11768 }
11769 return SDValue();
11770}
11771
11773 SDValue Op0 = Node->getOperand(0);
11774 SDValue Op1 = Node->getOperand(1);
11775 EVT VT = Op0.getValueType();
11776 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11777 unsigned Opcode = Node->getOpcode();
11778 SDLoc DL(Node);
11779
11780 // If both sign bits are zero, flip UMIN/UMAX <-> SMIN/SMAX if legal.
11781 unsigned AltOpcode = ISD::getOppositeSignednessMinMaxOpcode(Opcode);
11782 if (isOperationLegal(AltOpcode, VT) && DAG.SignBitIsZero(Op0) &&
11783 DAG.SignBitIsZero(Op1))
11784 return DAG.getNode(AltOpcode, DL, VT, Op0, Op1);
11785
11786 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
11787 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
11789 Op0 = DAG.getFreeze(Op0);
11790 SDValue Zero = DAG.getConstant(0, DL, VT);
11791 return DAG.getNode(ISD::SUB, DL, VT, Op0,
11792 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
11793 }
11794
11795 // umin(x,y) -> sub(x,usubsat(x,y))
11796 // TODO: Missing freeze(Op0)?
11797 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
11799 return DAG.getNode(ISD::SUB, DL, VT, Op0,
11800 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
11801 }
11802
11803 // umax(x,y) -> add(x,usubsat(y,x))
11804 // TODO: Missing freeze(Op0)?
11805 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
11807 return DAG.getNode(ISD::ADD, DL, VT, Op0,
11808 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
11809 }
11810
11811 // FIXME: Should really try to split the vector in case it's legal on a
11812 // subvector.
11814 return DAG.UnrollVectorOp(Node);
11815
11816 // Attempt to find an existing SETCC node that we can reuse.
11817 // TODO: Do we need a generic doesSETCCNodeExist?
11818 // TODO: Missing freeze(Op0)/freeze(Op1)?
11819 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
11820 ISD::CondCode PrefCommuteCC,
11821 ISD::CondCode AltCommuteCC) {
11822 SDVTList BoolVTList = DAG.getVTList(BoolVT);
11823 for (ISD::CondCode CC : {PrefCC, AltCC}) {
11824 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
11825 {Op0, Op1, DAG.getCondCode(CC)})) {
11826 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
11827 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
11828 }
11829 }
11830 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
11831 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
11832 {Op0, Op1, DAG.getCondCode(CC)})) {
11833 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
11834 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
11835 }
11836 }
11837 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
11838 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
11839 };
11840
11841 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
11842 // -> Y = (A < B) ? B : A
11843 // -> Y = (A >= B) ? A : B
11844 // -> Y = (A <= B) ? B : A
11845 switch (Opcode) {
11846 case ISD::SMAX:
11847 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
11848 case ISD::SMIN:
11849 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
11850 case ISD::UMAX:
11851 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
11852 case ISD::UMIN:
11853 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
11854 }
11855
11856 llvm_unreachable("How did we get here?");
11857}
11858
11860 unsigned Opcode = Node->getOpcode();
11861 SDValue LHS = Node->getOperand(0);
11862 SDValue RHS = Node->getOperand(1);
11863 EVT VT = LHS.getValueType();
11864 SDLoc dl(Node);
11865
11866 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
11867 assert(VT.isInteger() && "Expected operands to be integers");
11868
11869 // usub.sat(a, b) -> umax(a, b) - b
11870 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
11871 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
11872 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
11873 }
11874
11875 // usub.sat(a, 1) -> sub(a, zext(a != 0))
11876 // Prefer this on targets without legal/cost-effective overflow-carry nodes.
11877 if (Opcode == ISD::USUBSAT && isOneOrOneSplat(RHS) &&
11879 LHS = DAG.getFreeze(LHS);
11880 SDValue Zero = DAG.getConstant(0, dl, VT);
11881 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11882 SDValue IsNonZero = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETNE);
11883 SDValue Subtrahend = DAG.getBoolExtOrTrunc(IsNonZero, dl, VT, BoolVT);
11884 Subtrahend =
11885 DAG.getNode(ISD::AND, dl, VT, Subtrahend, DAG.getConstant(1, dl, VT));
11886 return DAG.getNode(ISD::SUB, dl, VT, LHS, Subtrahend);
11887 }
11888
11889 // uadd.sat(a, b) -> umin(a, ~b) + b
11890 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
11891 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
11892 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
11893 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
11894 }
11895
11896 unsigned OverflowOp;
11897 switch (Opcode) {
11898 case ISD::SADDSAT:
11899 OverflowOp = ISD::SADDO;
11900 break;
11901 case ISD::UADDSAT:
11902 OverflowOp = ISD::UADDO;
11903 break;
11904 case ISD::SSUBSAT:
11905 OverflowOp = ISD::SSUBO;
11906 break;
11907 case ISD::USUBSAT:
11908 OverflowOp = ISD::USUBO;
11909 break;
11910 default:
11911 llvm_unreachable("Expected method to receive signed or unsigned saturation "
11912 "addition or subtraction node.");
11913 }
11914
11915 // FIXME: Should really try to split the vector in case it's legal on a
11916 // subvector.
11918 return DAG.UnrollVectorOp(Node);
11919
11920 unsigned BitWidth = LHS.getScalarValueSizeInBits();
11921 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11922 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
11923 SDValue SumDiff = Result.getValue(0);
11924 SDValue Overflow = Result.getValue(1);
11925 SDValue Zero = DAG.getConstant(0, dl, VT);
11926 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
11927
11928 if (Opcode == ISD::UADDSAT) {
11930 // (LHS + RHS) | OverflowMask
11931 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11932 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
11933 }
11934 // Overflow ? 0xffff.... : (LHS + RHS)
11935 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
11936 }
11937
11938 if (Opcode == ISD::USUBSAT) {
11940 // (LHS - RHS) & ~OverflowMask
11941 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
11942 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
11943 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
11944 }
11945 // Overflow ? 0 : (LHS - RHS)
11946 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
11947 }
11948
11949 assert((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) &&
11950 "Expected signed saturating add/sub opcode");
11951
11952 const APInt MinVal = APInt::getSignedMinValue(BitWidth);
11953 const APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
11954
11955 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
11956 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
11957
11958 // If either of the operand signs are known, then they are guaranteed to
11959 // only saturate in one direction. If non-negative they will saturate
11960 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
11961 //
11962 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
11963 // sign of 'y' has to be flipped.
11964
11965 bool LHSIsNonNegative = KnownLHS.isNonNegative();
11966 bool RHSIsNonNegative =
11967 Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative() : KnownRHS.isNegative();
11968 if (LHSIsNonNegative || RHSIsNonNegative) {
11969 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
11970 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
11971 }
11972
11973 bool LHSIsNegative = KnownLHS.isNegative();
11974 bool RHSIsNegative =
11975 Opcode == ISD::SADDSAT ? KnownRHS.isNegative() : KnownRHS.isNonNegative();
11976 if (LHSIsNegative || RHSIsNegative) {
11977 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11978 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
11979 }
11980
11981 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
11982 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
11983 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
11984 DAG.getConstant(BitWidth - 1, dl, VT));
11985 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
11986 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
11987}
11988
11990 unsigned Opcode = Node->getOpcode();
11991 SDValue LHS = Node->getOperand(0);
11992 SDValue RHS = Node->getOperand(1);
11993 EVT VT = LHS.getValueType();
11994 EVT ResVT = Node->getValueType(0);
11995 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11996 SDLoc dl(Node);
11997
11998 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
11999 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
12000 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
12001 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
12002
12003 // We can't perform arithmetic on i1 values. Extending them would
12004 // probably result in worse codegen, so let's just use two selects instead.
12005 // Some targets are also just better off using selects rather than subtraction
12006 // because one of the conditions can be merged with one of the selects.
12007 // And finally, if we don't know the contents of high bits of a boolean value
12008 // we can't perform any arithmetic either.
12010 BoolVT.getScalarSizeInBits() == 1 ||
12012 SDValue SelectZeroOrOne =
12013 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
12014 DAG.getConstant(0, dl, ResVT));
12015 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
12016 SelectZeroOrOne);
12017 }
12018
12020 std::swap(IsGT, IsLT);
12021 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
12022 ResVT);
12023}
12024
12026 unsigned Opcode = Node->getOpcode();
12027 bool IsSigned = Opcode == ISD::SSHLSAT;
12028 SDValue LHS = Node->getOperand(0);
12029 SDValue RHS = Node->getOperand(1);
12030 EVT VT = LHS.getValueType();
12031 SDLoc dl(Node);
12032
12033 assert((Node->getOpcode() == ISD::SSHLSAT ||
12034 Node->getOpcode() == ISD::USHLSAT) &&
12035 "Expected a SHLSAT opcode");
12036 assert(VT.isInteger() && "Expected operands to be integers");
12037
12039 return DAG.UnrollVectorOp(Node);
12040
12041 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
12042
12043 unsigned BW = VT.getScalarSizeInBits();
12044 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12045 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
12046 SDValue Orig =
12047 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
12048
12049 SDValue SatVal;
12050 if (IsSigned) {
12051 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
12052 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
12053 SDValue Cond =
12054 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
12055 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
12056 } else {
12057 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
12058 }
12059 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
12060 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
12061}
12062
12064 bool Signed, SDValue &Lo, SDValue &Hi,
12065 SDValue LHS, SDValue RHS,
12066 SDValue HiLHS, SDValue HiRHS) const {
12067 EVT VT = LHS.getValueType();
12068 assert(RHS.getValueType() == VT && "Mismatching operand types");
12069
12070 assert((HiLHS && HiRHS) || (!HiLHS && !HiRHS));
12071 assert((!Signed || !HiLHS) &&
12072 "Signed flag should only be set when HiLHS and RiRHS are null");
12073
12074 // We'll expand the multiplication by brute force because we have no other
12075 // options. This is a trivially-generalized version of the code from
12076 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
12077 // 4.3.1). If Signed is set, we can use arithmetic right shifts to propagate
12078 // sign bits while calculating the Hi half.
12079 unsigned Bits = VT.getSizeInBits();
12080 unsigned HalfBits = Bits / 2;
12081 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
12082 SDValue LL = DAG.getNode(ISD::AND, dl, VT, LHS, Mask);
12083 SDValue RL = DAG.getNode(ISD::AND, dl, VT, RHS, Mask);
12084
12085 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LL, RL);
12086 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
12087
12088 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
12089 // This is always an unsigned shift.
12090 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
12091
12092 unsigned ShiftOpc = Signed ? ISD::SRA : ISD::SRL;
12093 SDValue LH = DAG.getNode(ShiftOpc, dl, VT, LHS, Shift);
12094 SDValue RH = DAG.getNode(ShiftOpc, dl, VT, RHS, Shift);
12095
12096 SDValue U =
12097 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RL), TH);
12098 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
12099 SDValue UH = DAG.getNode(ShiftOpc, dl, VT, U, Shift);
12100
12101 SDValue V =
12102 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LL, RH), UL);
12103 SDValue VH = DAG.getNode(ShiftOpc, dl, VT, V, Shift);
12104
12105 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
12106 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
12107
12108 Hi = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LH, RH),
12109 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
12110
12111 // If HiLHS and HiRHS are set, multiply them by the opposite low part and add
12112 // the products to Hi.
12113 if (HiLHS) {
12114 SDValue RHLL = DAG.getNode(ISD::MUL, dl, VT, HiRHS, LHS);
12115 SDValue RLLH = DAG.getNode(ISD::MUL, dl, VT, RHS, HiLHS);
12116 Hi = DAG.getNode(ISD::ADD, dl, VT, Hi,
12117 DAG.getNode(ISD::ADD, dl, VT, RHLL, RLLH));
12118 }
12119}
12120
12122 bool Signed, const SDValue LHS,
12123 const SDValue RHS, SDValue &Lo,
12124 SDValue &Hi) const {
12125 EVT VT = LHS.getValueType();
12126 assert(RHS.getValueType() == VT && "Mismatching operand types");
12127 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12128 // We can fall back to a libcall with an illegal type for the MUL if we
12129 // have a libcall big enough.
12130 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
12131 if (WideVT == MVT::i16)
12132 LC = RTLIB::MUL_I16;
12133 else if (WideVT == MVT::i32)
12134 LC = RTLIB::MUL_I32;
12135 else if (WideVT == MVT::i64)
12136 LC = RTLIB::MUL_I64;
12137 else if (WideVT == MVT::i128)
12138 LC = RTLIB::MUL_I128;
12139
12140 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
12141 if (LibcallImpl == RTLIB::Unsupported) {
12142 forceExpandMultiply(DAG, dl, Signed, Lo, Hi, LHS, RHS);
12143 return;
12144 }
12145
12146 SDValue HiLHS, HiRHS;
12147 if (Signed) {
12148 // The high part is obtained by SRA'ing all but one of the bits of low
12149 // part.
12150 unsigned LoSize = VT.getFixedSizeInBits();
12151 SDValue Shift = DAG.getShiftAmountConstant(LoSize - 1, VT, dl);
12152 HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, Shift);
12153 HiRHS = DAG.getNode(ISD::SRA, dl, VT, RHS, Shift);
12154 } else {
12155 HiLHS = DAG.getConstant(0, dl, VT);
12156 HiRHS = DAG.getConstant(0, dl, VT);
12157 }
12158
12159 // Attempt a libcall.
12160 SDValue Ret;
12162 CallOptions.setIsSigned(Signed);
12163 CallOptions.setIsPostTypeLegalization(true);
12165 // Halves of WideVT are packed into registers in different order
12166 // depending on platform endianness. This is usually handled by
12167 // the C calling convention, but we can't defer to it in
12168 // the legalizer.
12169 SDValue Args[] = {LHS, HiLHS, RHS, HiRHS};
12170 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
12171 } else {
12172 SDValue Args[] = {HiLHS, LHS, HiRHS, RHS};
12173 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
12174 }
12176 "Ret value is a collection of constituent nodes holding result.");
12177 if (DAG.getDataLayout().isLittleEndian()) {
12178 // Same as above.
12179 Lo = Ret.getOperand(0);
12180 Hi = Ret.getOperand(1);
12181 } else {
12182 Lo = Ret.getOperand(1);
12183 Hi = Ret.getOperand(0);
12184 }
12185}
12186
12187SDValue
12189 assert((Node->getOpcode() == ISD::SMULFIX ||
12190 Node->getOpcode() == ISD::UMULFIX ||
12191 Node->getOpcode() == ISD::SMULFIXSAT ||
12192 Node->getOpcode() == ISD::UMULFIXSAT) &&
12193 "Expected a fixed point multiplication opcode");
12194
12195 SDLoc dl(Node);
12196 SDValue LHS = Node->getOperand(0);
12197 SDValue RHS = Node->getOperand(1);
12198 EVT VT = LHS.getValueType();
12199 unsigned Scale = Node->getConstantOperandVal(2);
12200 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
12201 Node->getOpcode() == ISD::UMULFIXSAT);
12202 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
12203 Node->getOpcode() == ISD::SMULFIXSAT);
12204 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12205 unsigned VTSize = VT.getScalarSizeInBits();
12206
12207 if (!Scale) {
12208 // [us]mul.fix(a, b, 0) -> mul(a, b)
12209 if (!Saturating) {
12211 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12212 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
12213 SDValue Result =
12214 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
12215 SDValue Product = Result.getValue(0);
12216 SDValue Overflow = Result.getValue(1);
12217 SDValue Zero = DAG.getConstant(0, dl, VT);
12218
12219 APInt MinVal = APInt::getSignedMinValue(VTSize);
12220 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
12221 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
12222 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
12223 // Xor the inputs, if resulting sign bit is 0 the product will be
12224 // positive, else negative.
12225 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12226 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
12227 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
12228 return DAG.getSelect(dl, VT, Overflow, Result, Product);
12229 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
12230 SDValue Result =
12231 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
12232 SDValue Product = Result.getValue(0);
12233 SDValue Overflow = Result.getValue(1);
12234
12235 APInt MaxVal = APInt::getMaxValue(VTSize);
12236 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
12237 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
12238 }
12239 }
12240
12241 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
12242 "Expected scale to be less than the number of bits if signed or at "
12243 "most the number of bits if unsigned.");
12244 assert(LHS.getValueType() == RHS.getValueType() &&
12245 "Expected both operands to be the same type");
12246
12247 // Get the upper and lower bits of the result.
12248 SDValue Lo, Hi;
12249 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
12250 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
12251 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12252 if (isOperationLegalOrCustom(LoHiOp, VT)) {
12253 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
12254 Lo = Result.getValue(0);
12255 Hi = Result.getValue(1);
12256 } else if (isOperationLegalOrCustom(HiOp, VT)) {
12257 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12258 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
12259 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
12260 // Try for a multiplication using a wider type.
12261 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12262 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
12263 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
12264 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
12265 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
12266 SDValue Shifted =
12267 DAG.getNode(ISD::SRA, dl, WideVT, Res,
12268 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
12269 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
12270 } else if (VT.isVector()) {
12271 return SDValue();
12272 } else {
12273 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
12274 }
12275
12276 if (Scale == VTSize)
12277 // Result is just the top half since we'd be shifting by the width of the
12278 // operand. Overflow impossible so this works for both UMULFIX and
12279 // UMULFIXSAT.
12280 return Hi;
12281
12282 // The result will need to be shifted right by the scale since both operands
12283 // are scaled. The result is given to us in 2 halves, so we only want part of
12284 // both in the result.
12285 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
12286 DAG.getShiftAmountConstant(Scale, VT, dl));
12287 if (!Saturating)
12288 return Result;
12289
12290 if (!Signed) {
12291 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
12292 // widened multiplication) aren't all zeroes.
12293
12294 // Saturate to max if ((Hi >> Scale) != 0),
12295 // which is the same as if (Hi > ((1 << Scale) - 1))
12296 APInt MaxVal = APInt::getMaxValue(VTSize);
12297 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
12298 dl, VT);
12299 Result = DAG.getSelectCC(dl, Hi, LowMask,
12300 DAG.getConstant(MaxVal, dl, VT), Result,
12301 ISD::SETUGT);
12302
12303 return Result;
12304 }
12305
12306 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
12307 // widened multiplication) aren't all ones or all zeroes.
12308
12309 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
12310 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
12311
12312 if (Scale == 0) {
12313 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
12314 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
12315 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
12316 // Saturated to SatMin if wide product is negative, and SatMax if wide
12317 // product is positive ...
12318 SDValue Zero = DAG.getConstant(0, dl, VT);
12319 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
12320 ISD::SETLT);
12321 // ... but only if we overflowed.
12322 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
12323 }
12324
12325 // We handled Scale==0 above so all the bits to examine is in Hi.
12326
12327 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
12328 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
12329 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
12330 dl, VT);
12331 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
12332 // Saturate to min if (Hi >> (Scale - 1)) < -1),
12333 // which is the same as if (HI < (-1 << (Scale - 1))
12334 SDValue HighMask =
12335 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
12336 dl, VT);
12337 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
12338 return Result;
12339}
12340
12341SDValue
12343 SDValue LHS, SDValue RHS,
12344 unsigned Scale, SelectionDAG &DAG) const {
12345 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
12346 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
12347 "Expected a fixed point division opcode");
12348
12349 EVT VT = LHS.getValueType();
12350 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
12351 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
12352 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12353
12354 // If there is enough room in the type to upscale the LHS or downscale the
12355 // RHS before the division, we can perform it in this type without having to
12356 // resize. For signed operations, the LHS headroom is the number of
12357 // redundant sign bits, and for unsigned ones it is the number of zeroes.
12358 // The headroom for the RHS is the number of trailing zeroes.
12359 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
12361 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
12362
12363 // For signed saturating operations, we need to be able to detect true integer
12364 // division overflow; that is, when you have MIN / -EPS. However, this
12365 // is undefined behavior and if we emit divisions that could take such
12366 // values it may cause undesired behavior (arithmetic exceptions on x86, for
12367 // example).
12368 // Avoid this by requiring an extra bit so that we never get this case.
12369 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
12370 // signed saturating division, we need to emit a whopping 32-bit division.
12371 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
12372 return SDValue();
12373
12374 unsigned LHSShift = std::min(LHSLead, Scale);
12375 unsigned RHSShift = Scale - LHSShift;
12376
12377 // At this point, we know that if we shift the LHS up by LHSShift and the
12378 // RHS down by RHSShift, we can emit a regular division with a final scaling
12379 // factor of Scale.
12380
12381 if (LHSShift)
12382 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
12383 DAG.getShiftAmountConstant(LHSShift, VT, dl));
12384 if (RHSShift)
12385 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
12386 DAG.getShiftAmountConstant(RHSShift, VT, dl));
12387
12388 SDValue Quot;
12389 if (Signed) {
12390 // For signed operations, if the resulting quotient is negative and the
12391 // remainder is nonzero, subtract 1 from the quotient to round towards
12392 // negative infinity.
12393 SDValue Rem;
12394 // FIXME: Ideally we would always produce an SDIVREM here, but if the
12395 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
12396 // we couldn't just form a libcall, but the type legalizer doesn't do it.
12397 if (isTypeLegal(VT) &&
12399 Quot = DAG.getNode(ISD::SDIVREM, dl,
12400 DAG.getVTList(VT, VT),
12401 LHS, RHS);
12402 Rem = Quot.getValue(1);
12403 Quot = Quot.getValue(0);
12404 } else {
12405 Quot = DAG.getNode(ISD::SDIV, dl, VT,
12406 LHS, RHS);
12407 Rem = DAG.getNode(ISD::SREM, dl, VT,
12408 LHS, RHS);
12409 }
12410 SDValue Zero = DAG.getConstant(0, dl, VT);
12411 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
12412 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
12413 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
12414 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
12415 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
12416 DAG.getConstant(1, dl, VT));
12417 Quot = DAG.getSelect(dl, VT,
12418 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
12419 Sub1, Quot);
12420 } else
12421 Quot = DAG.getNode(ISD::UDIV, dl, VT,
12422 LHS, RHS);
12423
12424 return Quot;
12425}
12426
12428 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
12429 SDLoc dl(Node);
12430 SDValue LHS = Node->getOperand(0);
12431 SDValue RHS = Node->getOperand(1);
12432 bool IsAdd = Node->getOpcode() == ISD::UADDO;
12433
12434 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
12435 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
12436 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
12437 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
12438 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
12439 { LHS, RHS, CarryIn });
12440 Result = SDValue(NodeCarry.getNode(), 0);
12441 Overflow = SDValue(NodeCarry.getNode(), 1);
12442 return;
12443 }
12444
12445 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
12446 LHS.getValueType(), LHS, RHS);
12447
12448 EVT ResultType = Node->getValueType(1);
12449 EVT SetCCType = getSetCCResultType(
12450 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
12451 SDValue SetCC;
12452 if (IsAdd && isOneConstant(RHS)) {
12453 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
12454 // the live range of X. We assume comparing with 0 is cheap.
12455 // The general case (X + C) < C is not necessarily beneficial. Although we
12456 // reduce the live range of X, we may introduce the materialization of
12457 // constant C.
12458 SetCC =
12459 DAG.getSetCC(dl, SetCCType, Result,
12460 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
12461 } else if (IsAdd && isAllOnesConstant(RHS)) {
12462 // Special case: uaddo X, -1 overflows if X != 0.
12463 SetCC =
12464 DAG.getSetCC(dl, SetCCType, LHS,
12465 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
12466 } else {
12467 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
12468 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
12469 }
12470 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
12471}
12472
12474 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
12475 SDLoc dl(Node);
12476 SDValue LHS = Node->getOperand(0);
12477 SDValue RHS = Node->getOperand(1);
12478 bool IsAdd = Node->getOpcode() == ISD::SADDO;
12479
12480 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
12481 LHS.getValueType(), LHS, RHS);
12482
12483 EVT ResultType = Node->getValueType(1);
12484 EVT OType = getSetCCResultType(
12485 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
12486
12487 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
12488 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
12489 if (isOperationLegal(OpcSat, LHS.getValueType())) {
12490 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
12491 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
12492 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
12493 return;
12494 }
12495
12496 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
12497
12498 if (IsAdd) {
12499 // For an addition, the result should be less than one of the operands (LHS)
12500 // if and only if the other operand (RHS) is negative, otherwise there will
12501 // be overflow.
12502 SDValue ResultLowerThanLHS =
12503 DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
12504 SDValue RHSNegative = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETLT);
12505 Overflow = DAG.getBoolExtOrTrunc(
12506 DAG.getNode(ISD::XOR, dl, OType, RHSNegative, ResultLowerThanLHS), dl,
12507 ResultType, ResultType);
12508 } else {
12509 // For subtraction, overflow occurs when the signed comparison of operands
12510 // doesn't match the sign of the result.
12511 SDValue LHSLessThanRHS = DAG.getSetCC(dl, OType, LHS, RHS, ISD::SETLT);
12512 SDValue ResultNegative = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETLT);
12513 Overflow = DAG.getBoolExtOrTrunc(
12514 DAG.getNode(ISD::XOR, dl, OType, LHSLessThanRHS, ResultNegative), dl,
12515 ResultType, ResultType);
12516 }
12517}
12518
12520 SDValue &Overflow, SelectionDAG &DAG) const {
12521 SDLoc dl(Node);
12522 EVT VT = Node->getValueType(0);
12523 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
12524 SDValue LHS = Node->getOperand(0);
12525 SDValue RHS = Node->getOperand(1);
12526 bool isSigned = Node->getOpcode() == ISD::SMULO;
12527
12528 // For power-of-two multiplications we can use a simpler shift expansion.
12529 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
12530 const APInt &C = RHSC->getAPIntValue();
12531 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
12532 if (C.isPowerOf2()) {
12533 // smulo(x, signed_min) is same as umulo(x, signed_min).
12534 bool UseArithShift = isSigned && !C.isMinSignedValue();
12535 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
12536 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
12537 Overflow = DAG.getSetCC(dl, SetCCVT,
12538 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
12539 dl, VT, Result, ShiftAmt),
12540 LHS, ISD::SETNE);
12541 return true;
12542 }
12543 }
12544
12545 SDValue BottomHalf;
12546 SDValue TopHalf;
12547 EVT WideVT = VT.widenIntegerElementType(*DAG.getContext());
12548
12549 static const unsigned Ops[2][3] =
12552 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
12553 BottomHalf = DAG.getNode(Ops[isSigned][0], dl, DAG.getVTList(VT, VT), LHS,
12554 RHS);
12555 TopHalf = BottomHalf.getValue(1);
12556 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
12557 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
12558 TopHalf = DAG.getNode(Ops[isSigned][1], dl, VT, LHS, RHS);
12559 } else if (isTypeLegal(WideVT)) {
12560 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
12561 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
12562 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
12563 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
12564 SDValue ShiftAmt =
12565 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
12566 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
12567 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
12568 } else {
12569 if (VT.isVector())
12570 return false;
12571
12572 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
12573 }
12574
12575 Result = BottomHalf;
12576 if (isSigned) {
12577 SDValue ShiftAmt = DAG.getShiftAmountConstant(
12578 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
12579 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
12580 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
12581 } else {
12582 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
12583 DAG.getConstant(0, dl, VT), ISD::SETNE);
12584 }
12585
12586 // Truncate the result if SetCC returns a larger type than needed.
12587 EVT RType = Node->getValueType(1);
12588 if (RType.bitsLT(Overflow.getValueType()))
12589 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
12590
12591 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
12592 "Unexpected result type for S/UMULO legalization");
12593 return true;
12594}
12595
12597 SDLoc dl(Node);
12598 ISD::NodeType BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
12599 SDValue Op = Node->getOperand(0);
12600 SDNodeFlags Flags = Node->getFlags();
12601 EVT VT = Op.getValueType();
12602
12603 // Try to use a shuffle reduction for power of two vectors.
12604 if (VT.isPow2VectorType()) {
12605 // See if the reduction opcode is safe to use with widened types.
12606 bool WidenSrc = false;
12607 switch (Node->getOpcode()) {
12610 case ISD::VECREDUCE_ADD:
12611 case ISD::VECREDUCE_MUL:
12612 case ISD::VECREDUCE_AND:
12613 case ISD::VECREDUCE_OR:
12614 case ISD::VECREDUCE_XOR:
12619 WidenSrc = VT.isFixedLengthVector();
12620 break;
12621 }
12622
12624 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
12625 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT)) {
12626 if (WidenSrc && Op.getOpcode() != ISD::BUILD_VECTOR) {
12627 // Attempt to widen the source vectors to a legal op.
12628 EVT WideVT = getTypeToTransformTo(*DAG.getContext(), HalfVT);
12629 if (WideVT.isVector() &&
12630 WideVT.getScalarType() == HalfVT.getScalarType() &&
12631 WideVT.getVectorNumElements() >= HalfVT.getVectorNumElements() &&
12632 isOperationLegalOrCustom(BaseOpcode, WideVT)) {
12633 SDValue Lo, Hi;
12634 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
12635 Lo = DAG.getInsertSubvector(dl, DAG.getPOISON(WideVT), Lo, 0);
12636 Hi = DAG.getInsertSubvector(dl, DAG.getPOISON(WideVT), Hi, 0);
12637 Op = DAG.getNode(BaseOpcode, dl, WideVT, Lo, Hi, Flags);
12638 Op = DAG.getExtractSubvector(dl, HalfVT, Op, 0);
12639 VT = HalfVT;
12640 continue;
12641 }
12642 }
12643 break;
12644 }
12645
12646 SDValue Lo, Hi;
12647 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
12648 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Flags);
12649 VT = HalfVT;
12650
12651 // Stop if splitting is enough to make the reduction legal.
12652 if (isOperationLegalOrCustom(Node->getOpcode(), HalfVT))
12653 return DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Op,
12654 Flags);
12655 }
12656 }
12657
12658 if (VT.isScalableVector())
12660 "Expanding reductions for scalable vectors is undefined.");
12661
12662 EVT EltVT = VT.getVectorElementType();
12663 unsigned NumElts = VT.getVectorNumElements();
12664
12666 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
12667
12668 SDValue Res = Ops[0];
12669 for (unsigned i = 1; i < NumElts; i++)
12670 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
12671
12672 // Result type may be wider than element type.
12673 if (EltVT != Node->getValueType(0))
12674 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
12675 return Res;
12676}
12677
12679 SDLoc dl(Node);
12680 SDValue AccOp = Node->getOperand(0);
12681 SDValue VecOp = Node->getOperand(1);
12682 SDNodeFlags Flags = Node->getFlags();
12683
12684 EVT VT = VecOp.getValueType();
12685 EVT EltVT = VT.getVectorElementType();
12686
12687 if (VT.isScalableVector())
12689 "Expanding reductions for scalable vectors is undefined.");
12690
12691 unsigned NumElts = VT.getVectorNumElements();
12692
12694 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
12695
12696 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
12697
12698 SDValue Res = AccOp;
12699 for (unsigned i = 0; i < NumElts; i++)
12700 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
12701
12702 return Res;
12703}
12704
12706 SelectionDAG &DAG) const {
12707 EVT VT = Node->getValueType(0);
12708 SDLoc dl(Node);
12709 bool isSigned = Node->getOpcode() == ISD::SREM;
12710 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
12711 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
12712 SDValue Dividend = Node->getOperand(0);
12713 SDValue Divisor = Node->getOperand(1);
12714 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
12715 SDVTList VTs = DAG.getVTList(VT, VT);
12716 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
12717 return true;
12718 }
12719 if (isOperationLegalOrCustom(DivOpc, VT)) {
12720 // X % Y -> X-X/Y*Y
12721 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
12722 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
12723 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
12724 return true;
12725 }
12726 return false;
12727}
12728
12730 SelectionDAG &DAG) const {
12731 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
12732 SDLoc dl(SDValue(Node, 0));
12733 SDValue Src = Node->getOperand(0);
12734
12735 // DstVT is the result type, while SatVT is the size to which we saturate
12736 EVT SrcVT = Src.getValueType();
12737 EVT DstVT = Node->getValueType(0);
12738
12739 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
12740 unsigned SatWidth = SatVT.getScalarSizeInBits();
12741 unsigned DstWidth = DstVT.getScalarSizeInBits();
12742 assert(SatWidth <= DstWidth &&
12743 "Expected saturation width smaller than result width");
12744
12745 // Determine minimum and maximum integer values and their corresponding
12746 // floating-point values.
12747 APInt MinInt, MaxInt;
12748 if (IsSigned) {
12749 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
12750 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
12751 } else {
12752 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
12753 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
12754 }
12755
12756 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
12757 // libcall emission cannot handle this. Large result types will fail.
12758 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
12759 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
12760 SrcVT = Src.getValueType();
12761 }
12762
12763 const fltSemantics &Sem = SrcVT.getFltSemantics();
12764 APFloat MinFloat(Sem);
12765 APFloat MaxFloat(Sem);
12766
12767 APFloat::opStatus MinStatus =
12768 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
12769 APFloat::opStatus MaxStatus =
12770 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
12771 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
12772 !(MaxStatus & APFloat::opStatus::opInexact);
12773
12774 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
12775 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
12776
12777 // If the integer bounds are exactly representable as floats and min/max are
12778 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
12779 // of comparisons and selects.
12780 auto EmitMinMax = [&](unsigned MinOpcode, unsigned MaxOpcode,
12781 bool MayPropagateNaN) {
12782 bool MinMaxLegal = isOperationLegalOrCustom(MinOpcode, SrcVT) &&
12783 isOperationLegalOrCustom(MaxOpcode, SrcVT);
12784 if (!MinMaxLegal)
12785 return SDValue();
12786
12787 SDValue Clamped = Src;
12788
12789 // Clamp Src by MinFloat from below. If !MayPropagateNaN and Src is NaN
12790 // then the result is MinFloat.
12791 Clamped = DAG.getNode(MaxOpcode, dl, SrcVT, Clamped, MinFloatNode);
12792 // Clamp by MaxFloat from above. If !MayPropagateNaN then NaN cannot occur.
12793 Clamped = DAG.getNode(MinOpcode, dl, SrcVT, Clamped, MaxFloatNode);
12794 // Convert clamped value to integer.
12795 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
12796 dl, DstVT, Clamped);
12797
12798 // If !MayPropagateNan and the conversion is unsigned case we're done,
12799 // because we mapped NaN to MinFloat, which will cast to zero.
12800 if (!MayPropagateNaN && !IsSigned)
12801 return FpToInt;
12802
12803 // Otherwise, select 0 if Src is NaN.
12804 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
12805 EVT SetCCVT =
12806 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
12807 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
12808 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
12809 };
12810 if (AreExactFloatBounds) {
12811 if (SDValue Res = EmitMinMax(ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM,
12812 /*MayPropagateNaN=*/false))
12813 return Res;
12814 // These may propagate NaN for sNaN operands.
12815 if (SDValue Res =
12816 EmitMinMax(ISD::FMINNUM, ISD::FMAXNUM, /*MayPropagateNaN=*/true))
12817 return Res;
12818 // These always propagate NaN.
12819 if (SDValue Res =
12820 EmitMinMax(ISD::FMINIMUM, ISD::FMAXIMUM, /*MayPropagateNaN=*/true))
12821 return Res;
12822 }
12823
12824 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
12825 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
12826
12827 // Result of direct conversion. The assumption here is that the operation is
12828 // non-trapping and it's fine to apply it to an out-of-range value if we
12829 // select it away later.
12830 SDValue FpToInt =
12831 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
12832
12833 SDValue Select = FpToInt;
12834
12835 EVT SetCCVT =
12836 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
12837
12838 // If Src ULT MinFloat, select MinInt. In particular, this also selects
12839 // MinInt if Src is NaN.
12840 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
12841 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
12842 // If Src OGT MaxFloat, select MaxInt.
12843 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
12844 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
12845
12846 // In the unsigned case we are done, because we mapped NaN to MinInt, which
12847 // is already zero.
12848 if (!IsSigned)
12849 return Select;
12850
12851 // Otherwise, select 0 if Src is NaN.
12852 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
12853 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
12854 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
12855}
12856
12858 const SDLoc &dl,
12859 SelectionDAG &DAG) const {
12860 EVT OperandVT = Op.getValueType();
12861 if (OperandVT.getScalarType() == ResultVT.getScalarType())
12862 return Op;
12863 EVT ResultIntVT = ResultVT.changeTypeToInteger();
12864 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12865 // can induce double-rounding which may alter the results. We can
12866 // correct for this using a trick explained in: Boldo, Sylvie, and
12867 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12868 // World Congress. 2005.
12869 SDValue Narrow = DAG.getFPExtendOrRound(Op, dl, ResultVT);
12870 SDValue NarrowAsWide = DAG.getFPExtendOrRound(Narrow, dl, OperandVT);
12871
12872 // We can keep the narrow value as-is if narrowing was exact (no
12873 // rounding error), the wide value was NaN (the narrow value is also
12874 // NaN and should be preserved) or if we rounded to the odd value.
12875 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, Narrow);
12876 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
12877 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
12878 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
12879 EVT ResultIntVTCCVT = getSetCCResultType(
12880 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
12881 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
12882 // The result is already odd so we don't need to do anything.
12883 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
12884
12885 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12886 Op.getValueType());
12887 // We keep results which are exact, odd or NaN.
12888 SDValue KeepNarrow =
12889 DAG.getSetCC(dl, WideSetCCVT, Op, NarrowAsWide, ISD::SETUEQ);
12890 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
12891 // We morally performed a round-down if AbsNarrow is smaller than
12892 // AbsWide.
12893 SDValue AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
12894 SDValue AbsNarrowAsWide = DAG.getNode(ISD::FABS, dl, OperandVT, NarrowAsWide);
12895 SDValue NarrowIsRd =
12896 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
12897 // If the narrow value is odd or exact, pick it.
12898 // Otherwise, narrow is even and corresponds to either the rounded-up
12899 // or rounded-down value. If narrow is the rounded-down value, we want
12900 // the rounded-up value as it will be odd.
12901 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
12902 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
12903 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
12904 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
12905}
12906
12908 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
12909 SDValue Op = Node->getOperand(0);
12910 EVT VT = Node->getValueType(0);
12911 SDLoc dl(Node);
12912 if (VT.getScalarType() == MVT::bf16) {
12913 if (Node->getConstantOperandVal(1) == 1) {
12914 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
12915 }
12916 EVT OperandVT = Op.getValueType();
12917 SDValue IsNaN = DAG.getSetCC(
12918 dl,
12919 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
12920 Op, Op, ISD::SETUO);
12921
12922 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
12923 // can induce double-rounding which may alter the results. We can
12924 // correct for this using a trick explained in: Boldo, Sylvie, and
12925 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
12926 // World Congress. 2005.
12927 EVT F32 = VT.changeElementType(*DAG.getContext(), MVT::f32);
12928 EVT I32 = F32.changeTypeToInteger();
12929 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
12930 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
12931
12932 // Conversions should set NaN's quiet bit. This also prevents NaNs from
12933 // turning into infinities.
12934 SDValue NaN =
12935 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
12936
12937 // Factor in the contribution of the low 16 bits.
12938 SDValue One = DAG.getConstant(1, dl, I32);
12939 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
12940 DAG.getShiftAmountConstant(16, I32, dl));
12941 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
12942 SDValue RoundingBias =
12943 DAG.getNode(ISD::ADD, dl, I32, Lsb, DAG.getConstant(0x7fff, dl, I32));
12944 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
12945
12946 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
12947 // 0x80000000.
12948 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
12949
12950 // Now that we have rounded, shift the bits into position.
12951 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
12952 DAG.getShiftAmountConstant(16, I32, dl));
12953 EVT I16 = I32.changeElementType(*DAG.getContext(), MVT::i16);
12954 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
12955 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
12956 }
12957 return SDValue();
12958}
12959
12961 SelectionDAG &DAG) const {
12962 assert((Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT ||
12963 Node->getOpcode() == ISD::VECTOR_SPLICE_RIGHT) &&
12964 "Unexpected opcode!");
12965 assert((Node->getValueType(0).isScalableVector() ||
12966 !isa<ConstantSDNode>(Node->getOperand(2))) &&
12967 "Fixed length vector types with constant offsets expected to use "
12968 "SHUFFLE_VECTOR!");
12969
12970 EVT VT = Node->getValueType(0);
12971 SDValue V1 = Node->getOperand(0);
12972 SDValue V2 = Node->getOperand(1);
12973 SDValue Offset = Node->getOperand(2);
12974 SDLoc DL(Node);
12975
12976 // Expand through memory thusly:
12977 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
12978 // Store V1, Ptr
12979 // Store V2, Ptr + sizeof(V1)
12980 // if (VECTOR_SPLICE_LEFT)
12981 // Ptr = Ptr + (Offset * sizeof(VT.Elt))
12982 // else
12983 // Ptr = Ptr + sizeof(V1) - (Offset * size(VT.Elt))
12984 // Res = Load Ptr
12985
12986 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
12987
12989 VT.getVectorElementCount() * 2);
12990 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12991 EVT PtrVT = StackPtr.getValueType();
12992 auto &MF = DAG.getMachineFunction();
12993 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12994 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12995
12996 // Store the lo part of CONCAT_VECTORS(V1, V2)
12997 SDValue StoreV1 =
12998 DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo, Alignment);
12999 // Store the hi part of CONCAT_VECTORS(V1, V2)
13000 SDValue VTBytes = DAG.getTypeSize(DL, PtrVT, VT.getStoreSize());
13001 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, VTBytes);
13002 SDValue StoreV2 =
13003 DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo, Alignment);
13004
13005 // NOTE: TrailingBytes must be clamped so as not to read outside of V1:V2.
13006 SDValue EltByteSize =
13007 DAG.getTypeSize(DL, PtrVT, VT.getVectorElementType().getStoreSize());
13008 Offset = DAG.getZExtOrTrunc(Offset, DL, PtrVT);
13009 SDValue TrailingBytes = DAG.getNode(ISD::MUL, DL, PtrVT, Offset, EltByteSize);
13010
13011 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VTBytes);
13012
13013 if (Node->getOpcode() == ISD::VECTOR_SPLICE_LEFT)
13014 StackPtr = DAG.getMemBasePlusOffset(StackPtr, TrailingBytes, DL);
13015 else
13016 StackPtr = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
13017
13018 // Load the spliced result
13019 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
13021}
13022
13024 SelectionDAG &DAG) const {
13025 SDLoc DL(Node);
13026 SDValue Vec = Node->getOperand(0);
13027 SDValue Mask = Node->getOperand(1);
13028 SDValue Passthru = Node->getOperand(2);
13029
13030 EVT VecVT = Vec.getValueType();
13031 EVT ScalarVT = VecVT.getScalarType();
13032 EVT MaskVT = Mask.getValueType();
13033 EVT MaskScalarVT = MaskVT.getScalarType();
13034
13035 // Needs to be handled by targets that have scalable vector types.
13036 if (VecVT.isScalableVector())
13037 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
13038
13039 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
13040 SDValue StackPtr = DAG.CreateStackTemporary(VecVT.getStoreSize(), Alignment);
13041 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
13042 MachinePointerInfo PtrInfo =
13044
13045 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
13046 SDValue Chain = DAG.getEntryNode();
13047 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
13048
13049 bool HasPassthru = !Passthru.isUndef();
13050
13051 // If we have a passthru vector, store it on the stack, overwrite the matching
13052 // positions and then re-write the last element that was potentially
13053 // overwritten even though mask[i] = false.
13054 if (HasPassthru)
13055 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo, Alignment);
13056
13057 SDValue LastWriteVal;
13058 APInt PassthruSplatVal;
13059 bool IsSplatPassthru =
13060 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
13061
13062 if (IsSplatPassthru) {
13063 // As we do not know which position we wrote to last, we cannot simply
13064 // access that index from the passthru vector. So we first check if passthru
13065 // is a splat vector, to use any element ...
13066 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
13067 } else if (HasPassthru) {
13068 // ... if it is not a splat vector, we need to get the passthru value at
13069 // position = popcount(mask) and re-load it from the stack before it is
13070 // overwritten in the loop below.
13071 EVT PopcountVT = ScalarVT.changeTypeToInteger();
13072 SDValue Popcount = DAG.getNode(
13074 MaskVT.changeVectorElementType(*DAG.getContext(), MVT::i1), Mask);
13075 Popcount = DAG.getNode(
13077 MaskVT.changeVectorElementType(*DAG.getContext(), PopcountVT),
13078 Popcount);
13079 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, PopcountVT, Popcount);
13080 SDValue LastElmtPtr =
13081 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
13082 LastWriteVal = DAG.getLoad(
13083 ScalarVT, DL, Chain, LastElmtPtr,
13085 Chain = LastWriteVal.getValue(1);
13086 }
13087
13088 unsigned NumElms = VecVT.getVectorNumElements();
13089 for (unsigned I = 0; I < NumElms; I++) {
13090 SDValue ValI = DAG.getExtractVectorElt(DL, ScalarVT, Vec, I);
13091 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
13092 Chain = DAG.getStore(
13093 Chain, DL, ValI, OutPtr,
13095
13096 // Get the mask value and add it to the current output position. This
13097 // either increments by 1 if MaskI is true or adds 0 otherwise.
13098 // Freeze in case we have poison/undef mask entries.
13099 SDValue MaskI = DAG.getExtractVectorElt(DL, MaskScalarVT, Mask, I);
13100 MaskI = DAG.getFreeze(MaskI);
13101 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
13102 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
13103 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
13104
13105 if (HasPassthru && I == NumElms - 1) {
13106 SDValue EndOfVector =
13107 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
13108 SDValue AllLanesSelected =
13109 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
13110 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
13111 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
13112
13113 // Re-write the last ValI if all lanes were selected. Otherwise,
13114 // overwrite the last write it with the passthru value.
13115 LastWriteVal = DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI,
13116 LastWriteVal, SDNodeFlags::Unpredictable);
13117 Chain = DAG.getStore(
13118 Chain, DL, LastWriteVal, OutPtr,
13120 }
13121 }
13122
13123 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo, Alignment);
13124}
13125
13127 SDLoc DL(Node);
13128 EVT VT = Node->getValueType(0);
13129
13130 bool ZeroIsPoison = Node->getOpcode() == ISD::CTTZ_ELTS_ZERO_POISON;
13131 auto [Mask, StepVec] =
13132 getLegalMaskAndStepVector(Node->getOperand(0), ZeroIsPoison, DL, DAG);
13133
13134 // No legal step vector: split mask in half and recombine results.
13135 // LoNumElts uses the non-poison CTTZ_ELTS so its result is well-defined
13136 // (== LoNumElts when no active lane), allowing the SETNE comparison.
13137 // Result: (ResLo != LoNumElts) ? ResLo : (LoNumElts + ResHi)
13138 if (!StepVec) {
13139 EVT ResVT = Node->getValueType(0);
13140 auto [MaskLo, MaskHi] = DAG.SplitVector(Node->getOperand(0), DL);
13141 SDValue LoNumElts = DAG.getElementCount(
13142 DL, ResVT, MaskLo.getValueType().getVectorElementCount());
13143 SDValue ResLo = DAG.getNode(ISD::CTTZ_ELTS, DL, ResVT, MaskLo);
13144 SDValue ResHi = DAG.getNode(Node->getOpcode(), DL, ResVT, MaskHi);
13145 SDValue ResLoNotNumElts = DAG.getSetCC(
13146 DL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ResVT),
13147 ResLo, LoNumElts, ISD::SETNE);
13148 // Per LangRef, ResVT must be wide enough to hold the total element count,
13149 // so the sum cannot wrap as an unsigned add. NSW is not guaranteed since
13150 // the count is only required to fit unsigned.
13151 SDValue Sum = DAG.getNode(ISD::ADD, DL, ResVT, LoNumElts, ResHi,
13153 return DAG.getSelect(DL, ResVT, ResLoNotNumElts, ResLo, Sum);
13154 }
13155
13156 EVT StepVecVT = StepVec.getValueType();
13157 EVT StepVT = StepVecVT.getVectorElementType();
13158
13159 // Promote the scalar result type early to avoid redundant zexts.
13161 StepVT = getTypeToTransformTo(*DAG.getContext(), StepVT);
13162
13163 SDValue VL =
13164 DAG.getElementCount(DL, StepVT, StepVecVT.getVectorElementCount());
13165 SDValue SplatVL = DAG.getSplat(StepVecVT, DL, VL);
13166 StepVec = DAG.getNode(ISD::SUB, DL, StepVecVT, SplatVL, StepVec);
13167 SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
13168 SDValue Select = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
13170 StepVecVT.getVectorElementType(), Select);
13171 SDValue Sub = DAG.getNode(ISD::SUB, DL, StepVT, VL,
13172 DAG.getZExtOrTrunc(Max, DL, StepVT));
13173
13174 return DAG.getZExtOrTrunc(Sub, DL, VT);
13175}
13176
13178 SelectionDAG &DAG) const {
13179 SDLoc DL(N);
13180 SDValue Acc = N->getOperand(0);
13181 SDValue MulLHS = N->getOperand(1);
13182 SDValue MulRHS = N->getOperand(2);
13183 EVT AccVT = Acc.getValueType();
13184 EVT MulOpVT = MulLHS.getValueType();
13185
13186 EVT ExtMulOpVT =
13188 MulOpVT.getVectorElementCount());
13189
13190 unsigned ExtOpcLHS, ExtOpcRHS;
13191 switch (N->getOpcode()) {
13192 default:
13193 llvm_unreachable("Unexpected opcode");
13195 ExtOpcLHS = ExtOpcRHS = ISD::ZERO_EXTEND;
13196 break;
13198 ExtOpcLHS = ExtOpcRHS = ISD::SIGN_EXTEND;
13199 break;
13201 ExtOpcLHS = ExtOpcRHS = ISD::FP_EXTEND;
13202 break;
13203 }
13204
13205 if (ExtMulOpVT != MulOpVT) {
13206 MulLHS = DAG.getNode(ExtOpcLHS, DL, ExtMulOpVT, MulLHS);
13207 MulRHS = DAG.getNode(ExtOpcRHS, DL, ExtMulOpVT, MulRHS);
13208 }
13209 SDValue Input = MulLHS;
13210 if (N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA) {
13211 if (!llvm::isOneOrOneSplatFP(MulRHS))
13212 Input = DAG.getNode(ISD::FMUL, DL, ExtMulOpVT, MulLHS, MulRHS);
13213 } else if (!llvm::isOneOrOneSplat(MulRHS)) {
13214 Input = DAG.getNode(ISD::MUL, DL, ExtMulOpVT, MulLHS, MulRHS);
13215 }
13216
13217 unsigned Stride = AccVT.getVectorMinNumElements();
13218 unsigned ScaleFactor = MulOpVT.getVectorMinNumElements() / Stride;
13219
13220 // Collect all of the subvectors
13221 std::deque<SDValue> Subvectors = {Acc};
13222 for (unsigned I = 0; I < ScaleFactor; I++)
13223 Subvectors.push_back(DAG.getExtractSubvector(DL, AccVT, Input, I * Stride));
13224
13225 unsigned FlatNode =
13226 N->getOpcode() == ISD::PARTIAL_REDUCE_FMLA ? ISD::FADD : ISD::ADD;
13227
13228 // Flatten the subvector tree
13229 while (Subvectors.size() > 1) {
13230 Subvectors.push_back(
13231 DAG.getNode(FlatNode, DL, AccVT, {Subvectors[0], Subvectors[1]}));
13232 Subvectors.pop_front();
13233 Subvectors.pop_front();
13234 }
13235
13236 assert(Subvectors.size() == 1 &&
13237 "There should only be one subvector after tree flattening");
13238
13239 return Subvectors[0];
13240}
13241
13242/// Given a store node \p StoreNode, return true if it is safe to fold that node
13243/// into \p FPNode, which expands to a library call with output pointers.
13245 SDNode *FPNode) {
13247 SmallVector<const SDNode *, 8> DeferredNodes;
13249
13250 // Skip FPNode use by StoreNode (that's the use we want to fold into FPNode).
13251 for (SDValue Op : StoreNode->ops())
13252 if (Op.getNode() != FPNode)
13253 Worklist.push_back(Op.getNode());
13254
13256 while (!Worklist.empty()) {
13257 const SDNode *Node = Worklist.pop_back_val();
13258 auto [_, Inserted] = Visited.insert(Node);
13259 if (!Inserted)
13260 continue;
13261
13262 if (MaxSteps > 0 && Visited.size() >= MaxSteps)
13263 return false;
13264
13265 // Reached the FPNode (would result in a cycle).
13266 // OR Reached CALLSEQ_START (would result in nested call sequences).
13267 if (Node == FPNode || Node->getOpcode() == ISD::CALLSEQ_START)
13268 return false;
13269
13270 if (Node->getOpcode() == ISD::CALLSEQ_END) {
13271 // Defer looking into call sequences (so we can check we're outside one).
13272 // We still need to look through these for the predecessor check.
13273 DeferredNodes.push_back(Node);
13274 continue;
13275 }
13276
13277 for (SDValue Op : Node->ops())
13278 Worklist.push_back(Op.getNode());
13279 }
13280
13281 // True if we're outside a call sequence and don't have the FPNode as a
13282 // predecessor. No cycles or nested call sequences possible.
13283 return !SDNode::hasPredecessorHelper(FPNode, Visited, DeferredNodes,
13284 MaxSteps);
13285}
13286
13288 SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node,
13290 std::optional<unsigned> CallRetResNo) const {
13291 if (LC == RTLIB::UNKNOWN_LIBCALL)
13292 return false;
13293
13294 RTLIB::LibcallImpl LibcallImpl = getLibcallImpl(LC);
13295 if (LibcallImpl == RTLIB::Unsupported)
13296 return false;
13297
13298 LLVMContext &Ctx = *DAG.getContext();
13299 EVT VT = Node->getValueType(0);
13300 unsigned NumResults = Node->getNumValues();
13301
13302 // Find users of the node that store the results (and share input chains). The
13303 // destination pointers can be used instead of creating stack allocations.
13304 SDValue StoresInChain;
13305 SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
13306 for (SDNode *User : Node->users()) {
13308 continue;
13309 auto *ST = cast<StoreSDNode>(User);
13310 SDValue StoreValue = ST->getValue();
13311 unsigned ResNo = StoreValue.getResNo();
13312 // Ensure the store corresponds to an output pointer.
13313 if (CallRetResNo == ResNo)
13314 continue;
13315 // Ensure the store to the default address space and not atomic or volatile.
13316 if (!ST->isSimple() || ST->getAddressSpace() != 0)
13317 continue;
13318 // Ensure all store chains are the same (so they don't alias).
13319 if (StoresInChain && ST->getChain() != StoresInChain)
13320 continue;
13321 // Ensure the store is properly aligned.
13322 Type *StoreType = StoreValue.getValueType().getTypeForEVT(Ctx);
13323 if (ST->getAlign() <
13324 DAG.getDataLayout().getABITypeAlign(StoreType->getScalarType()))
13325 continue;
13326 // Avoid:
13327 // 1. Creating cyclic dependencies.
13328 // 2. Expanding the node to a call within a call sequence.
13330 continue;
13331 ResultStores[ResNo] = ST;
13332 StoresInChain = ST->getChain();
13333 }
13334
13335 ArgListTy Args;
13336
13337 // Pass the arguments.
13338 for (const SDValue &Op : Node->op_values()) {
13339 EVT ArgVT = Op.getValueType();
13340 Type *ArgTy = ArgVT.getTypeForEVT(Ctx);
13341 Args.emplace_back(Op, ArgTy);
13342 }
13343
13344 // Pass the output pointers.
13345 SmallVector<SDValue, 2> ResultPtrs(NumResults);
13347 for (auto [ResNo, ST] : llvm::enumerate(ResultStores)) {
13348 if (ResNo == CallRetResNo)
13349 continue;
13350 EVT ResVT = Node->getValueType(ResNo);
13351 SDValue ResultPtr = ST ? ST->getBasePtr() : DAG.CreateStackTemporary(ResVT);
13352 ResultPtrs[ResNo] = ResultPtr;
13353 Args.emplace_back(ResultPtr, PointerTy);
13354 }
13355
13356 SDLoc DL(Node);
13357
13359 // Pass the vector mask (if required).
13360 EVT MaskVT = getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
13361 SDValue Mask = DAG.getBoolConstant(true, DL, MaskVT, VT);
13362 Args.emplace_back(Mask, MaskVT.getTypeForEVT(Ctx));
13363 }
13364
13365 Type *RetType = CallRetResNo.has_value()
13366 ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
13367 : Type::getVoidTy(Ctx);
13368 SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
13369 SDValue Callee =
13370 DAG.getExternalSymbol(LibcallImpl, getPointerTy(DAG.getDataLayout()));
13372 CLI.setDebugLoc(DL).setChain(InChain).setLibCallee(
13373 getLibcallImplCallingConv(LibcallImpl), RetType, Callee, std::move(Args));
13374
13375 auto [Call, CallChain] = LowerCallTo(CLI);
13376
13377 for (auto [ResNo, ResultPtr] : llvm::enumerate(ResultPtrs)) {
13378 if (ResNo == CallRetResNo) {
13379 Results.push_back(Call);
13380 continue;
13381 }
13382 MachinePointerInfo PtrInfo;
13383 SDValue LoadResult = DAG.getLoad(Node->getValueType(ResNo), DL, CallChain,
13384 ResultPtr, PtrInfo);
13385 SDValue OutChain = LoadResult.getValue(1);
13386
13387 if (StoreSDNode *ST = ResultStores[ResNo]) {
13388 // Replace store with the library call.
13389 DAG.ReplaceAllUsesOfValueWith(SDValue(ST, 0), OutChain);
13390 PtrInfo = ST->getPointerInfo();
13391 } else {
13393 DAG.getMachineFunction(),
13394 cast<FrameIndexSDNode>(ResultPtr)->getIndex());
13395 }
13396
13397 Results.push_back(LoadResult);
13398 }
13399
13400 return true;
13401}
13402
13404 SDValue &LHS, SDValue &RHS,
13405 SDValue &CC, SDValue Mask,
13406 SDValue EVL, bool &NeedInvert,
13407 const SDLoc &dl, SDValue &Chain,
13408 bool IsSignaling) const {
13409 MVT OpVT = LHS.getSimpleValueType();
13410 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
13411 NeedInvert = false;
13412 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
13413 bool IsNonVP = !EVL;
13414 switch (getCondCodeAction(CCCode, OpVT)) {
13415 default:
13416 llvm_unreachable("Unknown condition code action!");
13418 // Nothing to do.
13419 break;
13422 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
13423 std::swap(LHS, RHS);
13424 CC = DAG.getCondCode(InvCC);
13425 return true;
13426 }
13427 // Swapping operands didn't work. Try inverting the condition.
13428 bool NeedSwap = false;
13429 InvCC = getSetCCInverse(CCCode, OpVT);
13430 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
13431 // If inverting the condition is not enough, try swapping operands
13432 // on top of it.
13433 InvCC = ISD::getSetCCSwappedOperands(InvCC);
13434 NeedSwap = true;
13435 }
13436 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
13437 CC = DAG.getCondCode(InvCC);
13438 NeedInvert = true;
13439 if (NeedSwap)
13440 std::swap(LHS, RHS);
13441 return true;
13442 }
13443
13444 // Special case: expand i1 comparisons using logical operations.
13445 if (OpVT == MVT::i1) {
13446 SDValue Ret;
13447 switch (CCCode) {
13448 default:
13449 llvm_unreachable("Unknown integer setcc!");
13450 case ISD::SETEQ: // X == Y --> ~(X ^ Y)
13451 Ret = DAG.getNOT(dl, DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS),
13452 MVT::i1);
13453 break;
13454 case ISD::SETNE: // X != Y --> (X ^ Y)
13455 Ret = DAG.getNode(ISD::XOR, dl, MVT::i1, LHS, RHS);
13456 break;
13457 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13458 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13459 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, RHS,
13460 DAG.getNOT(dl, LHS, MVT::i1));
13461 break;
13462 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13463 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13464 Ret = DAG.getNode(ISD::AND, dl, MVT::i1, LHS,
13465 DAG.getNOT(dl, RHS, MVT::i1));
13466 break;
13467 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13468 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13469 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, RHS,
13470 DAG.getNOT(dl, LHS, MVT::i1));
13471 break;
13472 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13473 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13474 Ret = DAG.getNode(ISD::OR, dl, MVT::i1, LHS,
13475 DAG.getNOT(dl, RHS, MVT::i1));
13476 break;
13477 }
13478
13479 LHS = DAG.getZExtOrTrunc(Ret, dl, VT);
13480 RHS = SDValue();
13481 CC = SDValue();
13482 return true;
13483 }
13484
13486 unsigned Opc = 0;
13487 switch (CCCode) {
13488 default:
13489 llvm_unreachable("Don't know how to expand this condition!");
13490 case ISD::SETUO:
13491 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
13492 CC1 = ISD::SETUNE;
13493 CC2 = ISD::SETUNE;
13494 Opc = ISD::OR;
13495 break;
13496 }
13498 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
13499 NeedInvert = true;
13500 [[fallthrough]];
13501 case ISD::SETO:
13503 "If SETO is expanded, SETOEQ must be legal!");
13504 CC1 = ISD::SETOEQ;
13505 CC2 = ISD::SETOEQ;
13506 Opc = ISD::AND;
13507 break;
13508 case ISD::SETONE:
13509 case ISD::SETUEQ:
13510 // If the SETUO or SETO CC isn't legal, we might be able to use
13511 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
13512 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
13513 // the operands.
13514 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
13515 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
13516 isCondCodeLegal(ISD::SETOLT, OpVT))) {
13517 CC1 = ISD::SETOGT;
13518 CC2 = ISD::SETOLT;
13519 Opc = ISD::OR;
13520 NeedInvert = ((unsigned)CCCode & 0x8U);
13521 break;
13522 }
13523 [[fallthrough]];
13524 case ISD::SETOEQ:
13525 case ISD::SETOGT:
13526 case ISD::SETOGE:
13527 case ISD::SETOLT:
13528 case ISD::SETOLE:
13529 case ISD::SETUNE:
13530 case ISD::SETUGT:
13531 case ISD::SETUGE:
13532 case ISD::SETULT:
13533 case ISD::SETULE:
13534 // If we are floating point, assign and break, otherwise fall through.
13535 if (!OpVT.isInteger()) {
13536 // We can use the 4th bit to tell if we are the unordered
13537 // or ordered version of the opcode.
13538 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
13539 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
13540 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
13541 break;
13542 }
13543 // Fallthrough if we are unsigned integer.
13544 [[fallthrough]];
13545 case ISD::SETLE:
13546 case ISD::SETGT:
13547 case ISD::SETGE:
13548 case ISD::SETLT:
13549 case ISD::SETNE:
13550 case ISD::SETEQ:
13551 // If all combinations of inverting the condition and swapping operands
13552 // didn't work then we have no means to expand the condition.
13553 llvm_unreachable("Don't know how to expand this condition!");
13554 }
13555
13556 SDValue SetCC1, SetCC2;
13557 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
13558 // If we aren't the ordered or unorder operation,
13559 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
13560 if (IsNonVP) {
13561 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
13562 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
13563 } else {
13564 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
13565 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
13566 }
13567 } else {
13568 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
13569 if (IsNonVP) {
13570 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
13571 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
13572 } else {
13573 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
13574 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
13575 }
13576 }
13577 if (Chain)
13578 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
13579 SetCC2.getValue(1));
13580 if (IsNonVP)
13581 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
13582 else {
13583 // Transform the binary opcode to the VP equivalent.
13584 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
13585 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
13586 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
13587 }
13588 RHS = SDValue();
13589 CC = SDValue();
13590 return true;
13591 }
13592 }
13593 return false;
13594}
13595
13597 SelectionDAG &DAG) const {
13598 EVT VT = Node->getValueType(0);
13599 // Despite its documentation, GetSplitDestVTs will assert if VT cannot be
13600 // split into two equal parts.
13601 if (!VT.isVector() || !VT.getVectorElementCount().isKnownMultipleOf(2))
13602 return SDValue();
13603
13604 // Restrict expansion to cases where both parts can be concatenated.
13605 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT);
13606 if (LoVT != HiVT || !isTypeLegal(LoVT))
13607 return SDValue();
13608
13609 SDLoc DL(Node);
13610 unsigned Opcode = Node->getOpcode();
13611
13612 // Don't expand if the result is likely to be unrolled anyway.
13613 if (!isOperationLegalOrCustomOrPromote(Opcode, LoVT))
13614 return SDValue();
13615
13616 SmallVector<SDValue, 4> LoOps, HiOps;
13617 for (const SDValue &V : Node->op_values()) {
13618 auto [Lo, Hi] = DAG.SplitVector(V, DL, LoVT, HiVT);
13619 LoOps.push_back(Lo);
13620 HiOps.push_back(Hi);
13621 }
13622
13623 SDValue SplitOpLo = DAG.getNode(Opcode, DL, LoVT, LoOps, Node->getFlags());
13624 SDValue SplitOpHi = DAG.getNode(Opcode, DL, HiVT, HiOps, Node->getFlags());
13625 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SplitOpLo, SplitOpHi);
13626}
13627
13629 const SDLoc &DL,
13630 EVT InVecVT, SDValue EltNo,
13631 LoadSDNode *OriginalLoad,
13632 SelectionDAG &DAG) const {
13633 assert(OriginalLoad->isSimple());
13634
13635 EVT VecEltVT = InVecVT.getVectorElementType();
13636
13637 // If the vector element type is not a multiple of a byte then we are unable
13638 // to correctly compute an address to load only the extracted element as a
13639 // scalar.
13640 if (!VecEltVT.isByteSized())
13641 return SDValue();
13642
13643 ISD::LoadExtType ExtTy =
13644 ResultVT.bitsGT(VecEltVT) ? ISD::EXTLOAD : ISD::NON_EXTLOAD;
13645 if (!isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
13646 return SDValue();
13647
13648 std::optional<unsigned> ByteOffset;
13649 Align Alignment = OriginalLoad->getAlign();
13651 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
13652 int Elt = ConstEltNo->getZExtValue();
13653 ByteOffset = VecEltVT.getSizeInBits() * Elt / 8;
13654 MPI = OriginalLoad->getPointerInfo().getWithOffset(*ByteOffset);
13655 Alignment = commonAlignment(Alignment, *ByteOffset);
13656 } else {
13657 // Discard the pointer info except the address space because the memory
13658 // operand can't represent this new access since the offset is variable.
13659 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
13660 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
13661 }
13662
13663 if (!shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT, ByteOffset))
13664 return SDValue();
13665
13666 unsigned IsFast = 0;
13667 if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
13668 OriginalLoad->getAddressSpace(), Alignment,
13669 OriginalLoad->getMemOperand()->getFlags(), &IsFast) ||
13670 !IsFast)
13671 return SDValue();
13672
13673 // The original DAG loaded the entire vector from memory, so arithmetic
13674 // within it must be inbounds.
13676 DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo);
13677
13678 // We are replacing a vector load with a scalar load. The new load must have
13679 // identical memory op ordering to the original.
13680 SDValue Load;
13681 if (ResultVT.bitsGT(VecEltVT)) {
13682 // If the result type of vextract is wider than the load, then issue an
13683 // extending load instead.
13684 ISD::LoadExtType ExtType =
13685 isLoadLegal(ResultVT, VecEltVT, Alignment,
13686 OriginalLoad->getAddressSpace(), ISD::ZEXTLOAD, false)
13688 : ISD::EXTLOAD;
13689 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
13690 NewPtr, MPI, VecEltVT, Alignment,
13691 OriginalLoad->getMemOperand()->getFlags(),
13692 OriginalLoad->getAAInfo());
13693 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
13694 } else {
13695 // The result type is narrower or the same width as the vector element
13696 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
13697 Alignment, OriginalLoad->getMemOperand()->getFlags(),
13698 OriginalLoad->getAAInfo());
13699 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
13700 if (ResultVT.bitsLT(VecEltVT))
13701 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
13702 else
13703 Load = DAG.getBitcast(ResultVT, Load);
13704 }
13705
13706 return Load;
13707}
13708
13709// Set type id for call site info and metadata 'call_target'.
13710// We are filtering for:
13711// a) The call-graph-section use case that wants to know about indirect
13712// calls, or
13713// b) We want to annotate indirect calls.
13715 const CallBase *CB, MachineFunction &MF,
13716 MachineFunction::CallSiteInfo &CSInfo) const {
13717 if (CB && CB->isIndirectCall() &&
13720 CSInfo = MachineFunction::CallSiteInfo(*CB);
13721}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT F32
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool isSigned(unsigned Opcode)
#define _
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
#define T
#define T1
uint64_t High
#define P(N)
Function const char * Passes
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static std::pair< SDValue, SDValue > getLegalMaskAndStepVector(SDValue Mask, bool ZeroIsPoison, SDLoc DL, SelectionDAG &DAG)
Returns a type-legalized version of Mask as the first item in the pair.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static bool canNarrowCLMULToLegal(const TargetLowering &TLI, LLVMContext &Ctx, EVT VT, unsigned HalveDepth=0, unsigned TotalDepth=0)
Check if CLMUL on VT can eventually reach a type with legal CLMUL through a chain of halving decompos...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static bool canFoldStoreIntoLibCallOutputPointers(StoreSDNode *StoreNode, SDNode *FPNode)
Given a store node StoreNode, return true if it is safe to fold that node into FPNode,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static LLVM_ABI const llvm::fltSemantics & EnumToSemantics(Semantics S)
Definition APFloat.cpp:111
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static LLVM_ABI ExponentType semanticsMinExponent(const fltSemantics &)
Definition APFloat.cpp:235
static LLVM_ABI unsigned getSizeInBits(const fltSemantics &Sem)
Returns the size of the floating point number (in bits) in the given semantics.
Definition APFloat.cpp:291
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:227
static LLVM_ABI bool isIEEELikeFP(const fltSemantics &)
Definition APFloat.cpp:268
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1406
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition APFloat.h:1217
APInt bitcastToAPInt() const
Definition APFloat.h:1430
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1197
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1157
void changeSign()
Definition APFloat.h:1356
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1168
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1616
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static LLVM_ABI void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition APInt.cpp:1810
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1429
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition APInt.h:424
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1414
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
void setSignBit()
Set the sign bit to 1.
Definition APInt.h:1363
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1256
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1419
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
void negate()
Negate this APInt in place.
Definition APInt.h:1491
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1554
unsigned countLeadingZeros() const
Definition APInt.h:1629
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:398
void clearLowBits(unsigned loBits)
Set bottom loBits bits to 0.
Definition APInt.h:1458
unsigned logBase2() const
Definition APInt.h:1784
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition APInt.h:834
void setAllBits()
Set every bit to 1.
Definition APInt.h:1342
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1317
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition APInt.h:406
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1157
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition APInt.h:1390
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
void clearBits(unsigned LoBit, unsigned HiBit)
Clear the bits from LoBit (inclusive) to HiBit (exclusive) to 0.
Definition APInt.h:1440
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1411
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:483
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition APInt.h:1465
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition APInt.h:1366
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition Constants.h:872
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
This class represents a range of values.
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:217
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
std::vector< std::string > ConstraintCodeVector
Definition InlineAsm.h:104
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MCRegister getLiveInPhysReg(Register VReg) const
getLiveInPhysReg - If VReg is a live-in virtual register, return the corresponding live-in physical r...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition Module.h:447
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
iterator end() const
Definition ArrayRef.h:339
iterator begin() const
Definition ArrayRef.h:338
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC)
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI bool isKnownNeverLogicalZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Test whether the given floating point SDValue (or all elements of it, if it is a vector) is known to ...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl, SDNodeFlags Flags={})
Constant fold a setcc to true or false.
bool isKnownNeverSNaN(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
LLVM_ABI std::optional< unsigned > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
static LLVM_ABI unsigned getHasPredecessorMaxSteps()
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
LLVM_ABI bool shouldOptForSize() const
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI std::optional< unsigned > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI bool isIdentityElement(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo, unsigned Depth=0) const
Returns true if V is an identity element of Opc with Flags.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, UndefPoisonKind Kind=UndefPoisonKind::UndefOrPoison, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, Kind can be used to track poison ...
LLVM_ABI bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI bool isKnownToBeAPowerOfTwo(SDValue Val, bool OrZero=false, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
iterator end() const
Definition StringRef.h:116
Class to represent struct types.
LLVM_ABI void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
unsigned getBitWidthForCttzElements(EVT RetVT, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const
Return true if it is profitable to reduce a load to a smaller type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool preferSelectsOverBooleanArithmetic(EVT VT) const
Should we prefer selects to doing arithmetic on boolean types.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
EVT getLegalTypeToTransformTo(LLVMContext &Context, EVT VT) const
Perform getTypeToTransformTo repeatedly until a legal type is obtained.
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall implementation.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
ISD::CondCode getSoftFloatCmpLibcallPredicate(RTLIB::LibcallImpl Call) const
Get the comparison predicate that's to be used to test the result of the comparison libcall against z...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
TargetLoweringBase(const TargetMachine &TM, const TargetSubtargetInfo &STI)
NOTE: The TargetMachine owns TLOF.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
virtual bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
virtual EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom for a comparison of the specified type...
bool isLoadLegal(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return true if the specified load with extension is legal on this target.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
bool expandMultipleResultFPLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl< SDValue > &Results, std::optional< unsigned > CallRetResNo={}) const
Expands a node with multiple results to an FP or vector libcall.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_POISON nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
SmallVector< ConstraintPair > ConstraintGroup
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
virtual Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, const SDValue LHS, const SDValue RHS, SDValue &Lo, SDValue &Hi) const
Calculate full product of LHS and RHS either via a libcall or through brute force expansion of the mu...
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandFCANONICALIZE(SDNode *Node, SelectionDAG &DAG) const
Expand FCANONICALIZE to FMUL with 1.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_POISON nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_POISON nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const
Expand carryless multiply.
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
virtual bool isTargetCanonicalConstantNode(SDValue Op) const
Returns true if the given Opc is considered a canonical constant for the target, which should not be ...
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue expandCttzElts(SDNode *Node, SelectionDAG &DAG) const
Expand a CTTZ_ELTS or CTTZ_ELTS_ZERO_POISON by calculating (VL - i) for each active lane (i),...
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual SDValue unwrapAddress(SDValue N) const
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, unsigned Depth) const
Return true if this function can prove that Op is never poison and, Kind can be used to track poison ...
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_POISON nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
void forceExpandMultiply(SelectionDAG &DAG, const SDLoc &dl, bool Signed, SDValue &Lo, SDValue &Hi, SDValue LHS, SDValue RHS, SDValue HiLHS=SDValue(), SDValue HiRHS=SDValue()) const
Calculate the product twice the width of LHS and RHS.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
SDValue expandLoopDependenceMask(SDNode *N, SelectionDAG &DAG) const
Expand LOOP_DEPENDENCE_MASK nodes.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
virtual void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
~TargetLowering() override
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using an n/2-bit algorithm.
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
virtual bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const
For most targets, an LLVM type must be broken down into multiple smaller types.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_POISON nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandCONVERT_FROM_ARBITRARY_FP(SDNode *Node, SelectionDAG &DAG) const
Expand CONVERT_FROM_ARBITRARY_FP using bit manipulation.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode, SDNodeFlags Flags={}) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index, const SDNodeFlags PtrArithFlags=SDNodeFlags()) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual void computeKnownFPClassForTargetNode(const SDValue Op, KnownFPClass &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine floating-point class information for a target node.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual void computeKnownFPClassForTargetInstr(GISelValueTracking &Analysis, Register R, KnownFPClass &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad, SelectionDAG &DAG) const
Replace an extraction of a load with a narrowed load.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
SDValue expandCTLS(SDNode *N, SelectionDAG &DAG) const
Expand CTLS (count leading sign bits) nodes.
void setTypeIdForCallsiteInfo(const CallBase *CB, MachineFunction &MF, MachineFunction::CallSiteInfo &CSInfo) const
Primary interface to the complete machine description for the target machine.
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
unsigned EmitCallSiteInfo
The flag enables call site info production.
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:785
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:311
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:282
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:326
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:716
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition APInt.cpp:3061
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ LOOP_DEPENDENCE_RAW_MASK
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition ISDOpcodes.h:538
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:783
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:394
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:522
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:400
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ CTTZ_ELTS
Returns the number of number of trailing (least significant) zero elements in a vector.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ VECTOR_FIND_LAST_ACTIVE
Finds the index of the last active mask element Operands: Mask.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:914
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:778
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:407
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ CTLZ_ZERO_POISON
Definition ISDOpcodes.h:792
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ PARTIAL_REDUCE_FMLA
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BRIND
BRIND - Indirect branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:800
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ GET_ACTIVE_LANE_MASK
GET_ACTIVE_LANE_MASK - this corrosponds to the llvm.get.active.lane.mask intrinsic.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:815
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:356
@ VECTOR_SPLICE_LEFT
VECTOR_SPLICE_LEFT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1, VEC2) left by OFFSET elements an...
Definition ISDOpcodes.h:653
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:903
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:413
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:809
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ VECTOR_SPLICE_RIGHT
VECTOR_SPLICE_RIGHT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1,VEC2) right by OFFSET elements a...
Definition ISDOpcodes.h:657
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ CTTZ_ZERO_POISON
Bit counting operators with a poisoned result for zero inputs.
Definition ISDOpcodes.h:791
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:925
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:949
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:837
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
@ CTTZ_ELTS_ZERO_POISON
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ ABS_MIN_POISON
ABS with a poison result for INT_MIN.
Definition ISDOpcodes.h:751
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI NodeType getOppositeSignednessMinMaxOpcode(unsigned MinMaxOpc)
Given a MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns the corresponding opcode with the opposi...
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTruncation=false)
Hook for matching ConstantSDNode predicate.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Or< Preds... > m_AnyOf(const Preds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
NUses_match< 1, Value_match > m_OneUse()
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
void stable_sort(R &&Range)
Definition STLExtras.h:2115
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
InstructionCost Cost
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1569
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI FPClassTest invertFPClassTestIfSimpler(FPClassTest Test, bool UseFCmp)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
LLVM_ABI bool isOneOrOneSplatFP(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant floating-point value, or a splatted vector of a constant float...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
void * PointerTy
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1551
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:173
LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1776
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
fltNonfiniteBehavior
Definition APFloat.h:952
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isZeroOrZeroSplat(SDValue N, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
UndefPoisonKind
Enumeration to track whether we are interested in Undef, Poison, or both.
Definition UndefPoison.h:20
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition APFloat.h:1666
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:129
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:307
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:323
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:373
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:494
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:266
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:382
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:453
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:501
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:435
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
bool isScalableVT() const
Return true if the type is a scalable type.
Definition ValueTypes.h:210
bool isFixedLengthVector() const
Definition ValueTypes.h:199
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT widenIntegerElementType(LLVMContext &Context) const
Return a VT for an integer element type with doubled bit width.
Definition ValueTypes.h:467
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:187
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:121
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:331
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:484
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition KnownBits.h:315
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:190
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition KnownBits.h:269
static LLVM_ABI KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:64
void setAllConflict()
Make all bits known to be both zero and one.
Definition KnownBits.h:97
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:165
KnownBits byteSwap() const
Definition KnownBits.h:553
static LLVM_ABI std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:303
KnownBits reverseBits() const
Definition KnownBits.h:557
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition KnownBits.h:247
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
static LLVM_ABI KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:176
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition KnownBits.h:335
bool isSignUnknown() const
Returns true if we don't know the sign bit.
Definition KnownBits.h:67
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:325
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:184
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
static LLVM_ABI KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
static LLVM_ABI std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
static LLVM_ABI std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
static LLVM_ABI KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition KnownBits.cpp:61
static LLVM_ABI std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
static LLVM_ABI std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
static LLVM_ABI KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition KnownBits.h:171
static LLVM_ABI std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
static LLVM_ABI std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:300
static LLVM_ABI std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
static LLVM_ABI KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl)
Returns true if the function has a vector mask argument, which is assumed to be the last argument.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
void setNoSignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
std::string ConstraintCode
This contains the actual string for the code, like "m".
LLVM_ABI unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
LLVM_ABI bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
MakeLibCallOptions & setIsSigned(bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true, bool AllowWidenOptimization=false)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...
fltNonfiniteBehavior nonFiniteBehavior
Definition APFloat.h:1013
fltNanEncoding nanEncoding
Definition APFloat.h:1015