LLVM 20.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
25#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCExpr.h"
36#include <cctype>
37using namespace llvm;
38
39/// NOTE: The TargetMachine owns TLOF.
41 : TargetLoweringBase(tm) {}
42
43const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44 return nullptr;
45}
46
49}
50
51/// Check whether a given call node is in tail position within its function. If
52/// so, it sets Chain to the input chain of the tail call.
54 SDValue &Chain) const {
56
57 // First, check if tail calls have been disabled in this function.
58 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59 return false;
60
61 // Conservatively require the attributes of the call to match those of
62 // the return. Ignore following attributes because they don't affect the
63 // call sequence.
64 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65 for (const auto &Attr :
66 {Attribute::Alignment, Attribute::Dereferenceable,
67 Attribute::DereferenceableOrNull, Attribute::NoAlias,
68 Attribute::NonNull, Attribute::NoUndef, Attribute::Range})
69 CallerAttrs.removeAttribute(Attr);
70
71 if (CallerAttrs.hasAttributes())
72 return false;
73
74 // It's not safe to eliminate the sign / zero extension of the return value.
75 if (CallerAttrs.contains(Attribute::ZExt) ||
76 CallerAttrs.contains(Attribute::SExt))
77 return false;
78
79 // Check if the only use is a function return node.
80 return isUsedByReturnOnly(Node, Chain);
81}
82
84 const uint32_t *CallerPreservedMask,
85 const SmallVectorImpl<CCValAssign> &ArgLocs,
86 const SmallVectorImpl<SDValue> &OutVals) const {
87 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
88 const CCValAssign &ArgLoc = ArgLocs[I];
89 if (!ArgLoc.isRegLoc())
90 continue;
91 MCRegister Reg = ArgLoc.getLocReg();
92 // Only look at callee saved registers.
93 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
94 continue;
95 // Check that we pass the value used for the caller.
96 // (We look for a CopyFromReg reading a virtual register that is used
97 // for the function live-in value of register Reg)
98 SDValue Value = OutVals[I];
99 if (Value->getOpcode() == ISD::AssertZext)
100 Value = Value.getOperand(0);
101 if (Value->getOpcode() != ISD::CopyFromReg)
102 return false;
103 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
104 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
105 return false;
106 }
107 return true;
108}
109
110/// Set CallLoweringInfo attribute flags based on a call instruction
111/// and called function attributes.
113 unsigned ArgIdx) {
114 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
115 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
116 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
117 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
118 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
119 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
120 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
121 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
122 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
123 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
124 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
125 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
126 Alignment = Call->getParamStackAlign(ArgIdx);
127 IndirectType = nullptr;
129 "multiple ABI attributes?");
130 if (IsByVal) {
131 IndirectType = Call->getParamByValType(ArgIdx);
132 if (!Alignment)
133 Alignment = Call->getParamAlign(ArgIdx);
134 }
135 if (IsPreallocated)
136 IndirectType = Call->getParamPreallocatedType(ArgIdx);
137 if (IsInAlloca)
138 IndirectType = Call->getParamInAllocaType(ArgIdx);
139 if (IsSRet)
140 IndirectType = Call->getParamStructRetType(ArgIdx);
141}
142
143/// Generate a libcall taking the given operands as arguments and returning a
144/// result of type RetVT.
145std::pair<SDValue, SDValue>
148 MakeLibCallOptions CallOptions,
149 const SDLoc &dl,
150 SDValue InChain) const {
151 if (!InChain)
152 InChain = DAG.getEntryNode();
153
155 Args.reserve(Ops.size());
156
158 for (unsigned i = 0; i < Ops.size(); ++i) {
159 SDValue NewOp = Ops[i];
160 Entry.Node = NewOp;
161 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
162 Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
163 CallOptions.IsSExt);
164 Entry.IsZExt = !Entry.IsSExt;
165
166 if (CallOptions.IsSoften &&
168 Entry.IsSExt = Entry.IsZExt = false;
169 }
170 Args.push_back(Entry);
171 }
172
173 if (LC == RTLIB::UNKNOWN_LIBCALL)
174 report_fatal_error("Unsupported library call operation!");
177
178 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
180 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
181 bool zeroExtend = !signExtend;
182
183 if (CallOptions.IsSoften &&
185 signExtend = zeroExtend = false;
186 }
187
188 CLI.setDebugLoc(dl)
189 .setChain(InChain)
190 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
191 .setNoReturn(CallOptions.DoesNotReturn)
194 .setSExtResult(signExtend)
195 .setZExtResult(zeroExtend);
196 return LowerCallTo(CLI);
197}
198
200 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
201 unsigned SrcAS, const AttributeList &FuncAttributes) const {
202 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
203 Op.getSrcAlign() < Op.getDstAlign())
204 return false;
205
206 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
207
208 if (VT == MVT::Other) {
209 // Use the largest integer type whose alignment constraints are satisfied.
210 // We only need to check DstAlign here as SrcAlign is always greater or
211 // equal to DstAlign (or zero).
212 VT = MVT::LAST_INTEGER_VALUETYPE;
213 if (Op.isFixedDstAlign())
214 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
215 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
217 assert(VT.isInteger());
218
219 // Find the largest legal integer type.
220 MVT LVT = MVT::LAST_INTEGER_VALUETYPE;
221 while (!isTypeLegal(LVT))
222 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
223 assert(LVT.isInteger());
224
225 // If the type we've chosen is larger than the largest legal integer type
226 // then use that instead.
227 if (VT.bitsGT(LVT))
228 VT = LVT;
229 }
230
231 unsigned NumMemOps = 0;
232 uint64_t Size = Op.size();
233 while (Size) {
234 unsigned VTSize = VT.getSizeInBits() / 8;
235 while (VTSize > Size) {
236 // For now, only use non-vector load / store's for the left-over pieces.
237 EVT NewVT = VT;
238 unsigned NewVTSize;
239
240 bool Found = false;
241 if (VT.isVector() || VT.isFloatingPoint()) {
242 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
245 Found = true;
246 else if (NewVT == MVT::i64 &&
248 isSafeMemOpType(MVT::f64)) {
249 // i64 is usually not legal on 32-bit targets, but f64 may be.
250 NewVT = MVT::f64;
251 Found = true;
252 }
253 }
254
255 if (!Found) {
256 do {
257 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
258 if (NewVT == MVT::i8)
259 break;
260 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
261 }
262 NewVTSize = NewVT.getSizeInBits() / 8;
263
264 // If the new VT cannot cover all of the remaining bits, then consider
265 // issuing a (or a pair of) unaligned and overlapping load / store.
266 unsigned Fast;
267 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
269 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
271 Fast)
272 VTSize = Size;
273 else {
274 VT = NewVT;
275 VTSize = NewVTSize;
276 }
277 }
278
279 if (++NumMemOps > Limit)
280 return false;
281
282 MemOps.push_back(VT);
283 Size -= VTSize;
284 }
285
286 return true;
287}
288
289/// Soften the operands of a comparison. This code is shared among BR_CC,
290/// SELECT_CC, and SETCC handlers.
292 SDValue &NewLHS, SDValue &NewRHS,
293 ISD::CondCode &CCCode,
294 const SDLoc &dl, const SDValue OldLHS,
295 const SDValue OldRHS) const {
296 SDValue Chain;
297 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
298 OldRHS, Chain);
299}
300
302 SDValue &NewLHS, SDValue &NewRHS,
303 ISD::CondCode &CCCode,
304 const SDLoc &dl, const SDValue OldLHS,
305 const SDValue OldRHS,
306 SDValue &Chain,
307 bool IsSignaling) const {
308 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
309 // not supporting it. We can update this code when libgcc provides such
310 // functions.
311
312 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
313 && "Unsupported setcc type!");
314
315 // Expand into one or more soft-fp libcall(s).
316 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
317 bool ShouldInvertCC = false;
318 switch (CCCode) {
319 case ISD::SETEQ:
320 case ISD::SETOEQ:
321 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
322 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
323 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
324 break;
325 case ISD::SETNE:
326 case ISD::SETUNE:
327 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
328 (VT == MVT::f64) ? RTLIB::UNE_F64 :
329 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
330 break;
331 case ISD::SETGE:
332 case ISD::SETOGE:
333 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
334 (VT == MVT::f64) ? RTLIB::OGE_F64 :
335 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
336 break;
337 case ISD::SETLT:
338 case ISD::SETOLT:
339 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
340 (VT == MVT::f64) ? RTLIB::OLT_F64 :
341 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
342 break;
343 case ISD::SETLE:
344 case ISD::SETOLE:
345 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
346 (VT == MVT::f64) ? RTLIB::OLE_F64 :
347 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
348 break;
349 case ISD::SETGT:
350 case ISD::SETOGT:
351 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
352 (VT == MVT::f64) ? RTLIB::OGT_F64 :
353 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
354 break;
355 case ISD::SETO:
356 ShouldInvertCC = true;
357 [[fallthrough]];
358 case ISD::SETUO:
359 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
360 (VT == MVT::f64) ? RTLIB::UO_F64 :
361 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
362 break;
363 case ISD::SETONE:
364 // SETONE = O && UNE
365 ShouldInvertCC = true;
366 [[fallthrough]];
367 case ISD::SETUEQ:
368 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
369 (VT == MVT::f64) ? RTLIB::UO_F64 :
370 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
371 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
372 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
373 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
374 break;
375 default:
376 // Invert CC for unordered comparisons
377 ShouldInvertCC = true;
378 switch (CCCode) {
379 case ISD::SETULT:
380 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
381 (VT == MVT::f64) ? RTLIB::OGE_F64 :
382 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
383 break;
384 case ISD::SETULE:
385 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
386 (VT == MVT::f64) ? RTLIB::OGT_F64 :
387 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
388 break;
389 case ISD::SETUGT:
390 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
391 (VT == MVT::f64) ? RTLIB::OLE_F64 :
392 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
393 break;
394 case ISD::SETUGE:
395 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
396 (VT == MVT::f64) ? RTLIB::OLT_F64 :
397 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
398 break;
399 default: llvm_unreachable("Do not know how to soften this setcc!");
400 }
401 }
402
403 // Use the target specific return value for comparison lib calls.
405 SDValue Ops[2] = {NewLHS, NewRHS};
407 EVT OpsVT[2] = { OldLHS.getValueType(),
408 OldRHS.getValueType() };
409 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
410 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
411 NewLHS = Call.first;
412 NewRHS = DAG.getConstant(0, dl, RetVT);
413
414 CCCode = getCmpLibcallCC(LC1);
415 if (ShouldInvertCC) {
416 assert(RetVT.isInteger());
417 CCCode = getSetCCInverse(CCCode, RetVT);
418 }
419
420 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
421 // Update Chain.
422 Chain = Call.second;
423 } else {
424 EVT SetCCVT =
425 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
426 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
427 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
428 CCCode = getCmpLibcallCC(LC2);
429 if (ShouldInvertCC)
430 CCCode = getSetCCInverse(CCCode, RetVT);
431 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
432 if (Chain)
433 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
434 Call2.second);
435 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
436 Tmp.getValueType(), Tmp, NewLHS);
437 NewRHS = SDValue();
438 }
439}
440
441/// Return the entry encoding for a jump table in the current function. The
442/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
444 // In non-pic modes, just use the address of a block.
445 if (!isPositionIndependent())
447
448 // In PIC mode, if the target supports a GPRel32 directive, use it.
449 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
451
452 // Otherwise, use a label difference.
454}
455
457 SelectionDAG &DAG) const {
458 // If our PIC model is GP relative, use the global offset table as the base.
459 unsigned JTEncoding = getJumpTableEncoding();
460
464
465 return Table;
466}
467
468/// This returns the relocation base for the given PIC jumptable, the same as
469/// getPICJumpTableRelocBase, but as an MCExpr.
470const MCExpr *
472 unsigned JTI,MCContext &Ctx) const{
473 // The normal PIC reloc base is the label at the start of the jump table.
474 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
475}
476
478 SDValue Addr, int JTI,
479 SelectionDAG &DAG) const {
480 SDValue Chain = Value;
481 // Jump table debug info is only needed if CodeView is enabled.
483 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
484 }
485 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
486}
487
488bool
490 const TargetMachine &TM = getTargetMachine();
491 const GlobalValue *GV = GA->getGlobal();
492
493 // If the address is not even local to this DSO we will have to load it from
494 // a got and then add the offset.
495 if (!TM.shouldAssumeDSOLocal(GV))
496 return false;
497
498 // If the code is position independent we will have to add a base register.
499 if (isPositionIndependent())
500 return false;
501
502 // Otherwise we can do it.
503 return true;
504}
505
506//===----------------------------------------------------------------------===//
507// Optimization Methods
508//===----------------------------------------------------------------------===//
509
510/// If the specified instruction has a constant integer operand and there are
511/// bits set in that constant that are not demanded, then clear those bits and
512/// return true.
514 const APInt &DemandedBits,
515 const APInt &DemandedElts,
516 TargetLoweringOpt &TLO) const {
517 SDLoc DL(Op);
518 unsigned Opcode = Op.getOpcode();
519
520 // Early-out if we've ended up calling an undemanded node, leave this to
521 // constant folding.
522 if (DemandedBits.isZero() || DemandedElts.isZero())
523 return false;
524
525 // Do target-specific constant optimization.
526 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
527 return TLO.New.getNode();
528
529 // FIXME: ISD::SELECT, ISD::SELECT_CC
530 switch (Opcode) {
531 default:
532 break;
533 case ISD::XOR:
534 case ISD::AND:
535 case ISD::OR: {
536 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
537 if (!Op1C || Op1C->isOpaque())
538 return false;
539
540 // If this is a 'not' op, don't touch it because that's a canonical form.
541 const APInt &C = Op1C->getAPIntValue();
542 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
543 return false;
544
545 if (!C.isSubsetOf(DemandedBits)) {
546 EVT VT = Op.getValueType();
547 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
548 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
549 Op->getFlags());
550 return TLO.CombineTo(Op, NewOp);
551 }
552
553 break;
554 }
555 }
556
557 return false;
558}
559
561 const APInt &DemandedBits,
562 TargetLoweringOpt &TLO) const {
563 EVT VT = Op.getValueType();
564 APInt DemandedElts = VT.isVector()
566 : APInt(1, 1);
567 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
568}
569
570/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
571/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
572/// but it could be generalized for targets with other types of implicit
573/// widening casts.
575 const APInt &DemandedBits,
576 TargetLoweringOpt &TLO) const {
577 assert(Op.getNumOperands() == 2 &&
578 "ShrinkDemandedOp only supports binary operators!");
579 assert(Op.getNode()->getNumValues() == 1 &&
580 "ShrinkDemandedOp only supports nodes with one result!");
581
582 EVT VT = Op.getValueType();
583 SelectionDAG &DAG = TLO.DAG;
584 SDLoc dl(Op);
585
586 // Early return, as this function cannot handle vector types.
587 if (VT.isVector())
588 return false;
589
590 assert(Op.getOperand(0).getValueType().getScalarSizeInBits() == BitWidth &&
591 Op.getOperand(1).getValueType().getScalarSizeInBits() == BitWidth &&
592 "ShrinkDemandedOp only supports operands that have the same size!");
593
594 // Don't do this if the node has another user, which may require the
595 // full value.
596 if (!Op.getNode()->hasOneUse())
597 return false;
598
599 // Search for the smallest integer type with free casts to and from
600 // Op's type. For expedience, just check power-of-2 integer types.
601 unsigned DemandedSize = DemandedBits.getActiveBits();
602 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
603 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
604 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
605 if (isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT)) {
606 // We found a type with free casts.
607 SDValue X = DAG.getNode(
608 Op.getOpcode(), dl, SmallVT,
609 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
610 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
611 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
612 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
613 return TLO.CombineTo(Op, Z);
614 }
615 }
616 return false;
617}
618
620 DAGCombinerInfo &DCI) const {
621 SelectionDAG &DAG = DCI.DAG;
622 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
623 !DCI.isBeforeLegalizeOps());
624 KnownBits Known;
625
626 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
627 if (Simplified) {
628 DCI.AddToWorklist(Op.getNode());
630 }
631 return Simplified;
632}
633
635 const APInt &DemandedElts,
636 DAGCombinerInfo &DCI) const {
637 SelectionDAG &DAG = DCI.DAG;
638 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
639 !DCI.isBeforeLegalizeOps());
640 KnownBits Known;
641
642 bool Simplified =
643 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
644 if (Simplified) {
645 DCI.AddToWorklist(Op.getNode());
647 }
648 return Simplified;
649}
650
652 KnownBits &Known,
654 unsigned Depth,
655 bool AssumeSingleUse) const {
656 EVT VT = Op.getValueType();
657
658 // Since the number of lanes in a scalable vector is unknown at compile time,
659 // we track one bit which is implicitly broadcast to all lanes. This means
660 // that all lanes in a scalable vector are considered demanded.
661 APInt DemandedElts = VT.isFixedLengthVector()
663 : APInt(1, 1);
664 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
665 AssumeSingleUse);
666}
667
668// TODO: Under what circumstances can we create nodes? Constant folding?
670 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
671 SelectionDAG &DAG, unsigned Depth) const {
672 EVT VT = Op.getValueType();
673
674 // Limit search depth.
676 return SDValue();
677
678 // Ignore UNDEFs.
679 if (Op.isUndef())
680 return SDValue();
681
682 // Not demanding any bits/elts from Op.
683 if (DemandedBits == 0 || DemandedElts == 0)
684 return DAG.getUNDEF(VT);
685
686 bool IsLE = DAG.getDataLayout().isLittleEndian();
687 unsigned NumElts = DemandedElts.getBitWidth();
688 unsigned BitWidth = DemandedBits.getBitWidth();
689 KnownBits LHSKnown, RHSKnown;
690 switch (Op.getOpcode()) {
691 case ISD::BITCAST: {
692 if (VT.isScalableVector())
693 return SDValue();
694
695 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
696 EVT SrcVT = Src.getValueType();
697 EVT DstVT = Op.getValueType();
698 if (SrcVT == DstVT)
699 return Src;
700
701 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
702 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
703 if (NumSrcEltBits == NumDstEltBits)
704 if (SDValue V = SimplifyMultipleUseDemandedBits(
705 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
706 return DAG.getBitcast(DstVT, V);
707
708 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
709 unsigned Scale = NumDstEltBits / NumSrcEltBits;
710 unsigned NumSrcElts = SrcVT.getVectorNumElements();
711 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
712 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
713 for (unsigned i = 0; i != Scale; ++i) {
714 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
715 unsigned BitOffset = EltOffset * NumSrcEltBits;
716 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
717 if (!Sub.isZero()) {
718 DemandedSrcBits |= Sub;
719 for (unsigned j = 0; j != NumElts; ++j)
720 if (DemandedElts[j])
721 DemandedSrcElts.setBit((j * Scale) + i);
722 }
723 }
724
725 if (SDValue V = SimplifyMultipleUseDemandedBits(
726 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
727 return DAG.getBitcast(DstVT, V);
728 }
729
730 // TODO - bigendian once we have test coverage.
731 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
732 unsigned Scale = NumSrcEltBits / NumDstEltBits;
733 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
734 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
735 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
736 for (unsigned i = 0; i != NumElts; ++i)
737 if (DemandedElts[i]) {
738 unsigned Offset = (i % Scale) * NumDstEltBits;
739 DemandedSrcBits.insertBits(DemandedBits, Offset);
740 DemandedSrcElts.setBit(i / Scale);
741 }
742
743 if (SDValue V = SimplifyMultipleUseDemandedBits(
744 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
745 return DAG.getBitcast(DstVT, V);
746 }
747
748 break;
749 }
750 case ISD::FREEZE: {
751 SDValue N0 = Op.getOperand(0);
752 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
753 /*PoisonOnly=*/false))
754 return N0;
755 break;
756 }
757 case ISD::AND: {
758 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
759 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
760
761 // If all of the demanded bits are known 1 on one side, return the other.
762 // These bits cannot contribute to the result of the 'and' in this
763 // context.
764 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
765 return Op.getOperand(0);
766 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
767 return Op.getOperand(1);
768 break;
769 }
770 case ISD::OR: {
771 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
772 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
773
774 // If all of the demanded bits are known zero on one side, return the
775 // other. These bits cannot contribute to the result of the 'or' in this
776 // context.
777 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
778 return Op.getOperand(0);
779 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
780 return Op.getOperand(1);
781 break;
782 }
783 case ISD::XOR: {
784 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
785 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
786
787 // If all of the demanded bits are known zero on one side, return the
788 // other.
789 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
790 return Op.getOperand(0);
791 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
792 return Op.getOperand(1);
793 break;
794 }
795 case ISD::SHL: {
796 // If we are only demanding sign bits then we can use the shift source
797 // directly.
798 if (std::optional<uint64_t> MaxSA =
799 DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
800 SDValue Op0 = Op.getOperand(0);
801 unsigned ShAmt = *MaxSA;
802 unsigned NumSignBits =
803 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
804 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
805 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
806 return Op0;
807 }
808 break;
809 }
810 case ISD::SETCC: {
811 SDValue Op0 = Op.getOperand(0);
812 SDValue Op1 = Op.getOperand(1);
813 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
814 // If (1) we only need the sign-bit, (2) the setcc operands are the same
815 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
816 // -1, we may be able to bypass the setcc.
817 if (DemandedBits.isSignMask() &&
821 // If we're testing X < 0, then this compare isn't needed - just use X!
822 // FIXME: We're limiting to integer types here, but this should also work
823 // if we don't care about FP signed-zero. The use of SETLT with FP means
824 // that we don't care about NaNs.
825 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
827 return Op0;
828 }
829 break;
830 }
832 // If none of the extended bits are demanded, eliminate the sextinreg.
833 SDValue Op0 = Op.getOperand(0);
834 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
835 unsigned ExBits = ExVT.getScalarSizeInBits();
836 if (DemandedBits.getActiveBits() <= ExBits &&
838 return Op0;
839 // If the input is already sign extended, just drop the extension.
840 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
841 if (NumSignBits >= (BitWidth - ExBits + 1))
842 return Op0;
843 break;
844 }
848 if (VT.isScalableVector())
849 return SDValue();
850
851 // If we only want the lowest element and none of extended bits, then we can
852 // return the bitcasted source vector.
853 SDValue Src = Op.getOperand(0);
854 EVT SrcVT = Src.getValueType();
855 EVT DstVT = Op.getValueType();
856 if (IsLE && DemandedElts == 1 &&
857 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
858 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
859 return DAG.getBitcast(DstVT, Src);
860 }
861 break;
862 }
864 if (VT.isScalableVector())
865 return SDValue();
866
867 // If we don't demand the inserted element, return the base vector.
868 SDValue Vec = Op.getOperand(0);
869 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
870 EVT VecVT = Vec.getValueType();
871 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
872 !DemandedElts[CIdx->getZExtValue()])
873 return Vec;
874 break;
875 }
877 if (VT.isScalableVector())
878 return SDValue();
879
880 SDValue Vec = Op.getOperand(0);
881 SDValue Sub = Op.getOperand(1);
882 uint64_t Idx = Op.getConstantOperandVal(2);
883 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
884 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
885 // If we don't demand the inserted subvector, return the base vector.
886 if (DemandedSubElts == 0)
887 return Vec;
888 break;
889 }
890 case ISD::VECTOR_SHUFFLE: {
892 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
893
894 // If all the demanded elts are from one operand and are inline,
895 // then we can use the operand directly.
896 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
897 for (unsigned i = 0; i != NumElts; ++i) {
898 int M = ShuffleMask[i];
899 if (M < 0 || !DemandedElts[i])
900 continue;
901 AllUndef = false;
902 IdentityLHS &= (M == (int)i);
903 IdentityRHS &= ((M - NumElts) == i);
904 }
905
906 if (AllUndef)
907 return DAG.getUNDEF(Op.getValueType());
908 if (IdentityLHS)
909 return Op.getOperand(0);
910 if (IdentityRHS)
911 return Op.getOperand(1);
912 break;
913 }
914 default:
915 // TODO: Probably okay to remove after audit; here to reduce change size
916 // in initial enablement patch for scalable vectors
917 if (VT.isScalableVector())
918 return SDValue();
919
920 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
921 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
922 Op, DemandedBits, DemandedElts, DAG, Depth))
923 return V;
924 break;
925 }
926 return SDValue();
927}
928
931 unsigned Depth) const {
932 EVT VT = Op.getValueType();
933 // Since the number of lanes in a scalable vector is unknown at compile time,
934 // we track one bit which is implicitly broadcast to all lanes. This means
935 // that all lanes in a scalable vector are considered demanded.
936 APInt DemandedElts = VT.isFixedLengthVector()
938 : APInt(1, 1);
939 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
940 Depth);
941}
942
944 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
945 unsigned Depth) const {
946 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
947 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
948 Depth);
949}
950
951// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
952// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
955 const TargetLowering &TLI,
956 const APInt &DemandedBits,
957 const APInt &DemandedElts, unsigned Depth) {
958 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
959 "SRL or SRA node is required here!");
960 // Is the right shift using an immediate value of 1?
961 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
962 if (!N1C || !N1C->isOne())
963 return SDValue();
964
965 // We are looking for an avgfloor
966 // add(ext, ext)
967 // or one of these as a avgceil
968 // add(add(ext, ext), 1)
969 // add(add(ext, 1), ext)
970 // add(ext, add(ext, 1))
971 SDValue Add = Op.getOperand(0);
972 if (Add.getOpcode() != ISD::ADD)
973 return SDValue();
974
975 SDValue ExtOpA = Add.getOperand(0);
976 SDValue ExtOpB = Add.getOperand(1);
977 SDValue Add2;
978 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
979 ConstantSDNode *ConstOp;
980 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
981 ConstOp->isOne()) {
982 ExtOpA = Op1;
983 ExtOpB = Op3;
984 Add2 = A;
985 return true;
986 }
987 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
988 ConstOp->isOne()) {
989 ExtOpA = Op1;
990 ExtOpB = Op2;
991 Add2 = A;
992 return true;
993 }
994 return false;
995 };
996 bool IsCeil =
997 (ExtOpA.getOpcode() == ISD::ADD &&
998 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
999 (ExtOpB.getOpcode() == ISD::ADD &&
1000 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
1001
1002 // If the shift is signed (sra):
1003 // - Needs >= 2 sign bit for both operands.
1004 // - Needs >= 2 zero bits.
1005 // If the shift is unsigned (srl):
1006 // - Needs >= 1 zero bit for both operands.
1007 // - Needs 1 demanded bit zero and >= 2 sign bits.
1008 SelectionDAG &DAG = TLO.DAG;
1009 unsigned ShiftOpc = Op.getOpcode();
1010 bool IsSigned = false;
1011 unsigned KnownBits;
1012 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1013 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1014 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1015 unsigned NumZeroA =
1016 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1017 unsigned NumZeroB =
1018 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1019 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1020
1021 switch (ShiftOpc) {
1022 default:
1023 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1024 case ISD::SRA: {
1025 if (NumZero >= 2 && NumSigned < NumZero) {
1026 IsSigned = false;
1027 KnownBits = NumZero;
1028 break;
1029 }
1030 if (NumSigned >= 1) {
1031 IsSigned = true;
1032 KnownBits = NumSigned;
1033 break;
1034 }
1035 return SDValue();
1036 }
1037 case ISD::SRL: {
1038 if (NumZero >= 1 && NumSigned < NumZero) {
1039 IsSigned = false;
1040 KnownBits = NumZero;
1041 break;
1042 }
1043 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1044 IsSigned = true;
1045 KnownBits = NumSigned;
1046 break;
1047 }
1048 return SDValue();
1049 }
1050 }
1051
1052 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1053 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1054
1055 // Find the smallest power-2 type that is legal for this vector size and
1056 // operation, given the original type size and the number of known sign/zero
1057 // bits.
1058 EVT VT = Op.getValueType();
1059 unsigned MinWidth =
1060 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1061 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1063 return SDValue();
1064 if (VT.isVector())
1065 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1066 if (TLO.LegalTypes() && !TLI.isOperationLegal(AVGOpc, NVT)) {
1067 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1068 // larger type size to do the transform.
1069 if (TLO.LegalOperations() && !TLI.isOperationLegal(AVGOpc, VT))
1070 return SDValue();
1071 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1072 Add.getOperand(1)) &&
1073 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1074 Add2.getOperand(1))))
1075 NVT = VT;
1076 else
1077 return SDValue();
1078 }
1079
1080 // Don't create a AVGFLOOR node with a scalar constant unless its legal as
1081 // this is likely to stop other folds (reassociation, value tracking etc.)
1082 if (!IsCeil && !TLI.isOperationLegal(AVGOpc, NVT) &&
1083 (isa<ConstantSDNode>(ExtOpA) || isa<ConstantSDNode>(ExtOpB)))
1084 return SDValue();
1085
1086 SDLoc DL(Op);
1087 SDValue ResultAVG =
1088 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1089 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1090 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1091}
1092
1093/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1094/// result of Op are ever used downstream. If we can use this information to
1095/// simplify Op, create a new simplified DAG node and return true, returning the
1096/// original and new nodes in Old and New. Otherwise, analyze the expression and
1097/// return a mask of Known bits for the expression (used to simplify the
1098/// caller). The Known bits may only be accurate for those bits in the
1099/// OriginalDemandedBits and OriginalDemandedElts.
1101 SDValue Op, const APInt &OriginalDemandedBits,
1102 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1103 unsigned Depth, bool AssumeSingleUse) const {
1104 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1105 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1106 "Mask size mismatches value type size!");
1107
1108 // Don't know anything.
1109 Known = KnownBits(BitWidth);
1110
1111 EVT VT = Op.getValueType();
1112 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1113 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1114 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1115 "Unexpected vector size");
1116
1117 APInt DemandedBits = OriginalDemandedBits;
1118 APInt DemandedElts = OriginalDemandedElts;
1119 SDLoc dl(Op);
1120
1121 // Undef operand.
1122 if (Op.isUndef())
1123 return false;
1124
1125 // We can't simplify target constants.
1126 if (Op.getOpcode() == ISD::TargetConstant)
1127 return false;
1128
1129 if (Op.getOpcode() == ISD::Constant) {
1130 // We know all of the bits for a constant!
1131 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1132 return false;
1133 }
1134
1135 if (Op.getOpcode() == ISD::ConstantFP) {
1136 // We know all of the bits for a floating point constant!
1138 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1139 return false;
1140 }
1141
1142 // Other users may use these bits.
1143 bool HasMultiUse = false;
1144 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1146 // Limit search depth.
1147 return false;
1148 }
1149 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1151 DemandedElts = APInt::getAllOnes(NumElts);
1152 HasMultiUse = true;
1153 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1154 // Not demanding any bits/elts from Op.
1155 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1156 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1157 // Limit search depth.
1158 return false;
1159 }
1160
1161 KnownBits Known2;
1162 switch (Op.getOpcode()) {
1163 case ISD::SCALAR_TO_VECTOR: {
1164 if (VT.isScalableVector())
1165 return false;
1166 if (!DemandedElts[0])
1167 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1168
1169 KnownBits SrcKnown;
1170 SDValue Src = Op.getOperand(0);
1171 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1172 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1173 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1174 return true;
1175
1176 // Upper elements are undef, so only get the knownbits if we just demand
1177 // the bottom element.
1178 if (DemandedElts == 1)
1179 Known = SrcKnown.anyextOrTrunc(BitWidth);
1180 break;
1181 }
1182 case ISD::BUILD_VECTOR:
1183 // Collect the known bits that are shared by every demanded element.
1184 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1185 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1186 return false; // Don't fall through, will infinitely loop.
1187 case ISD::SPLAT_VECTOR: {
1188 SDValue Scl = Op.getOperand(0);
1189 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1190 KnownBits KnownScl;
1191 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1192 return true;
1193
1194 // Implicitly truncate the bits to match the official semantics of
1195 // SPLAT_VECTOR.
1196 Known = KnownScl.trunc(BitWidth);
1197 break;
1198 }
1199 case ISD::LOAD: {
1200 auto *LD = cast<LoadSDNode>(Op);
1201 if (getTargetConstantFromLoad(LD)) {
1202 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1203 return false; // Don't fall through, will infinitely loop.
1204 }
1205 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1206 // If this is a ZEXTLoad and we are looking at the loaded value.
1207 EVT MemVT = LD->getMemoryVT();
1208 unsigned MemBits = MemVT.getScalarSizeInBits();
1209 Known.Zero.setBitsFrom(MemBits);
1210 return false; // Don't fall through, will infinitely loop.
1211 }
1212 break;
1213 }
1215 if (VT.isScalableVector())
1216 return false;
1217 SDValue Vec = Op.getOperand(0);
1218 SDValue Scl = Op.getOperand(1);
1219 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1220 EVT VecVT = Vec.getValueType();
1221
1222 // If index isn't constant, assume we need all vector elements AND the
1223 // inserted element.
1224 APInt DemandedVecElts(DemandedElts);
1225 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1226 unsigned Idx = CIdx->getZExtValue();
1227 DemandedVecElts.clearBit(Idx);
1228
1229 // Inserted element is not required.
1230 if (!DemandedElts[Idx])
1231 return TLO.CombineTo(Op, Vec);
1232 }
1233
1234 KnownBits KnownScl;
1235 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1236 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1237 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1238 return true;
1239
1240 Known = KnownScl.anyextOrTrunc(BitWidth);
1241
1242 KnownBits KnownVec;
1243 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1244 Depth + 1))
1245 return true;
1246
1247 if (!!DemandedVecElts)
1248 Known = Known.intersectWith(KnownVec);
1249
1250 return false;
1251 }
1252 case ISD::INSERT_SUBVECTOR: {
1253 if (VT.isScalableVector())
1254 return false;
1255 // Demand any elements from the subvector and the remainder from the src its
1256 // inserted into.
1257 SDValue Src = Op.getOperand(0);
1258 SDValue Sub = Op.getOperand(1);
1259 uint64_t Idx = Op.getConstantOperandVal(2);
1260 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1261 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1262 APInt DemandedSrcElts = DemandedElts;
1263 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1264
1265 KnownBits KnownSub, KnownSrc;
1266 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1267 Depth + 1))
1268 return true;
1269 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1270 Depth + 1))
1271 return true;
1272
1273 Known.Zero.setAllBits();
1274 Known.One.setAllBits();
1275 if (!!DemandedSubElts)
1276 Known = Known.intersectWith(KnownSub);
1277 if (!!DemandedSrcElts)
1278 Known = Known.intersectWith(KnownSrc);
1279
1280 // Attempt to avoid multi-use src if we don't need anything from it.
1281 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1282 !DemandedSrcElts.isAllOnes()) {
1283 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1284 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1285 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1286 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1287 if (NewSub || NewSrc) {
1288 NewSub = NewSub ? NewSub : Sub;
1289 NewSrc = NewSrc ? NewSrc : Src;
1290 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1291 Op.getOperand(2));
1292 return TLO.CombineTo(Op, NewOp);
1293 }
1294 }
1295 break;
1296 }
1298 if (VT.isScalableVector())
1299 return false;
1300 // Offset the demanded elts by the subvector index.
1301 SDValue Src = Op.getOperand(0);
1302 if (Src.getValueType().isScalableVector())
1303 break;
1304 uint64_t Idx = Op.getConstantOperandVal(1);
1305 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1306 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1307
1308 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1309 Depth + 1))
1310 return true;
1311
1312 // Attempt to avoid multi-use src if we don't need anything from it.
1313 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1314 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1315 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1316 if (DemandedSrc) {
1317 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1318 Op.getOperand(1));
1319 return TLO.CombineTo(Op, NewOp);
1320 }
1321 }
1322 break;
1323 }
1324 case ISD::CONCAT_VECTORS: {
1325 if (VT.isScalableVector())
1326 return false;
1327 Known.Zero.setAllBits();
1328 Known.One.setAllBits();
1329 EVT SubVT = Op.getOperand(0).getValueType();
1330 unsigned NumSubVecs = Op.getNumOperands();
1331 unsigned NumSubElts = SubVT.getVectorNumElements();
1332 for (unsigned i = 0; i != NumSubVecs; ++i) {
1333 APInt DemandedSubElts =
1334 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1335 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1336 Known2, TLO, Depth + 1))
1337 return true;
1338 // Known bits are shared by every demanded subvector element.
1339 if (!!DemandedSubElts)
1340 Known = Known.intersectWith(Known2);
1341 }
1342 break;
1343 }
1344 case ISD::VECTOR_SHUFFLE: {
1345 assert(!VT.isScalableVector());
1346 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1347
1348 // Collect demanded elements from shuffle operands..
1349 APInt DemandedLHS, DemandedRHS;
1350 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1351 DemandedRHS))
1352 break;
1353
1354 if (!!DemandedLHS || !!DemandedRHS) {
1355 SDValue Op0 = Op.getOperand(0);
1356 SDValue Op1 = Op.getOperand(1);
1357
1358 Known.Zero.setAllBits();
1359 Known.One.setAllBits();
1360 if (!!DemandedLHS) {
1361 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1362 Depth + 1))
1363 return true;
1364 Known = Known.intersectWith(Known2);
1365 }
1366 if (!!DemandedRHS) {
1367 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1368 Depth + 1))
1369 return true;
1370 Known = Known.intersectWith(Known2);
1371 }
1372
1373 // Attempt to avoid multi-use ops if we don't need anything from them.
1374 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1375 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1376 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1377 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1378 if (DemandedOp0 || DemandedOp1) {
1379 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1380 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1381 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1382 return TLO.CombineTo(Op, NewOp);
1383 }
1384 }
1385 break;
1386 }
1387 case ISD::AND: {
1388 SDValue Op0 = Op.getOperand(0);
1389 SDValue Op1 = Op.getOperand(1);
1390
1391 // If the RHS is a constant, check to see if the LHS would be zero without
1392 // using the bits from the RHS. Below, we use knowledge about the RHS to
1393 // simplify the LHS, here we're using information from the LHS to simplify
1394 // the RHS.
1395 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
1396 // Do not increment Depth here; that can cause an infinite loop.
1397 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1398 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1399 if ((LHSKnown.Zero & DemandedBits) ==
1400 (~RHSC->getAPIntValue() & DemandedBits))
1401 return TLO.CombineTo(Op, Op0);
1402
1403 // If any of the set bits in the RHS are known zero on the LHS, shrink
1404 // the constant.
1405 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1406 DemandedElts, TLO))
1407 return true;
1408
1409 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1410 // constant, but if this 'and' is only clearing bits that were just set by
1411 // the xor, then this 'and' can be eliminated by shrinking the mask of
1412 // the xor. For example, for a 32-bit X:
1413 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1414 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1415 LHSKnown.One == ~RHSC->getAPIntValue()) {
1416 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1417 return TLO.CombineTo(Op, Xor);
1418 }
1419 }
1420
1421 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1422 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1423 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1424 (Op0.getOperand(0).isUndef() ||
1426 Op0->hasOneUse()) {
1427 unsigned NumSubElts =
1429 unsigned SubIdx = Op0.getConstantOperandVal(2);
1430 APInt DemandedSub =
1431 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1432 KnownBits KnownSubMask =
1433 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1434 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1435 SDValue NewAnd =
1436 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1437 SDValue NewInsert =
1438 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1439 Op0.getOperand(1), Op0.getOperand(2));
1440 return TLO.CombineTo(Op, NewInsert);
1441 }
1442 }
1443
1444 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1445 Depth + 1))
1446 return true;
1447 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1448 Known2, TLO, Depth + 1))
1449 return true;
1450
1451 // If all of the demanded bits are known one on one side, return the other.
1452 // These bits cannot contribute to the result of the 'and'.
1453 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1454 return TLO.CombineTo(Op, Op0);
1455 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1456 return TLO.CombineTo(Op, Op1);
1457 // If all of the demanded bits in the inputs are known zeros, return zero.
1458 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1459 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1460 // If the RHS is a constant, see if we can simplify it.
1461 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1462 TLO))
1463 return true;
1464 // If the operation can be done in a smaller type, do so.
1465 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1466 return true;
1467
1468 // Attempt to avoid multi-use ops if we don't need anything from them.
1469 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1470 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1471 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1472 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1473 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1474 if (DemandedOp0 || DemandedOp1) {
1475 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1476 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1477 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1478 return TLO.CombineTo(Op, NewOp);
1479 }
1480 }
1481
1482 Known &= Known2;
1483 break;
1484 }
1485 case ISD::OR: {
1486 SDValue Op0 = Op.getOperand(0);
1487 SDValue Op1 = Op.getOperand(1);
1488 SDNodeFlags Flags = Op.getNode()->getFlags();
1489 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1490 Depth + 1)) {
1491 if (Flags.hasDisjoint()) {
1492 Flags.setDisjoint(false);
1493 Op->setFlags(Flags);
1494 }
1495 return true;
1496 }
1497
1498 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1499 Known2, TLO, Depth + 1)) {
1500 if (Flags.hasDisjoint()) {
1501 Flags.setDisjoint(false);
1502 Op->setFlags(Flags);
1503 }
1504 return true;
1505 }
1506
1507 // If all of the demanded bits are known zero on one side, return the other.
1508 // These bits cannot contribute to the result of the 'or'.
1509 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1510 return TLO.CombineTo(Op, Op0);
1511 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1512 return TLO.CombineTo(Op, Op1);
1513 // If the RHS is a constant, see if we can simplify it.
1514 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1515 return true;
1516 // If the operation can be done in a smaller type, do so.
1517 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1518 return true;
1519
1520 // Attempt to avoid multi-use ops if we don't need anything from them.
1521 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1522 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1523 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1524 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1525 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1526 if (DemandedOp0 || DemandedOp1) {
1527 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1528 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1529 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1530 return TLO.CombineTo(Op, NewOp);
1531 }
1532 }
1533
1534 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1535 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1536 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1537 Op0->hasOneUse() && Op1->hasOneUse()) {
1538 // Attempt to match all commutations - m_c_Or would've been useful!
1539 for (int I = 0; I != 2; ++I) {
1540 SDValue X = Op.getOperand(I).getOperand(0);
1541 SDValue C1 = Op.getOperand(I).getOperand(1);
1542 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1543 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1544 if (Alt.getOpcode() == ISD::OR) {
1545 for (int J = 0; J != 2; ++J) {
1546 if (X == Alt.getOperand(J)) {
1547 SDValue Y = Alt.getOperand(1 - J);
1548 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1549 {C1, C2})) {
1550 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1551 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1552 return TLO.CombineTo(
1553 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1554 }
1555 }
1556 }
1557 }
1558 }
1559 }
1560
1561 Known |= Known2;
1562 break;
1563 }
1564 case ISD::XOR: {
1565 SDValue Op0 = Op.getOperand(0);
1566 SDValue Op1 = Op.getOperand(1);
1567
1568 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1569 Depth + 1))
1570 return true;
1571 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1572 Depth + 1))
1573 return true;
1574
1575 // If all of the demanded bits are known zero on one side, return the other.
1576 // These bits cannot contribute to the result of the 'xor'.
1577 if (DemandedBits.isSubsetOf(Known.Zero))
1578 return TLO.CombineTo(Op, Op0);
1579 if (DemandedBits.isSubsetOf(Known2.Zero))
1580 return TLO.CombineTo(Op, Op1);
1581 // If the operation can be done in a smaller type, do so.
1582 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1583 return true;
1584
1585 // If all of the unknown bits are known to be zero on one side or the other
1586 // turn this into an *inclusive* or.
1587 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1588 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1589 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1590
1591 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1592 if (C) {
1593 // If one side is a constant, and all of the set bits in the constant are
1594 // also known set on the other side, turn this into an AND, as we know
1595 // the bits will be cleared.
1596 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1597 // NB: it is okay if more bits are known than are requested
1598 if (C->getAPIntValue() == Known2.One) {
1599 SDValue ANDC =
1600 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1601 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1602 }
1603
1604 // If the RHS is a constant, see if we can change it. Don't alter a -1
1605 // constant because that's a 'not' op, and that is better for combining
1606 // and codegen.
1607 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1608 // We're flipping all demanded bits. Flip the undemanded bits too.
1609 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1610 return TLO.CombineTo(Op, New);
1611 }
1612
1613 unsigned Op0Opcode = Op0.getOpcode();
1614 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1615 if (ConstantSDNode *ShiftC =
1616 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1617 // Don't crash on an oversized shift. We can not guarantee that a
1618 // bogus shift has been simplified to undef.
1619 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1620 uint64_t ShiftAmt = ShiftC->getZExtValue();
1622 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1623 : Ones.lshr(ShiftAmt);
1624 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1625 isDesirableToCommuteXorWithShift(Op.getNode())) {
1626 // If the xor constant is a demanded mask, do a 'not' before the
1627 // shift:
1628 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1629 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1630 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1631 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1632 Op0.getOperand(1)));
1633 }
1634 }
1635 }
1636 }
1637 }
1638
1639 // If we can't turn this into a 'not', try to shrink the constant.
1640 if (!C || !C->isAllOnes())
1641 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1642 return true;
1643
1644 // Attempt to avoid multi-use ops if we don't need anything from them.
1645 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1646 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1647 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1648 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1649 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1650 if (DemandedOp0 || DemandedOp1) {
1651 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1652 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1653 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1654 return TLO.CombineTo(Op, NewOp);
1655 }
1656 }
1657
1658 Known ^= Known2;
1659 break;
1660 }
1661 case ISD::SELECT:
1662 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1663 Known, TLO, Depth + 1))
1664 return true;
1665 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1666 Known2, TLO, Depth + 1))
1667 return true;
1668
1669 // If the operands are constants, see if we can simplify them.
1670 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1671 return true;
1672
1673 // Only known if known in both the LHS and RHS.
1674 Known = Known.intersectWith(Known2);
1675 break;
1676 case ISD::VSELECT:
1677 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1678 Known, TLO, Depth + 1))
1679 return true;
1680 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1681 Known2, TLO, Depth + 1))
1682 return true;
1683
1684 // Only known if known in both the LHS and RHS.
1685 Known = Known.intersectWith(Known2);
1686 break;
1687 case ISD::SELECT_CC:
1688 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1689 Known, TLO, Depth + 1))
1690 return true;
1691 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1692 Known2, TLO, Depth + 1))
1693 return true;
1694
1695 // If the operands are constants, see if we can simplify them.
1696 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1697 return true;
1698
1699 // Only known if known in both the LHS and RHS.
1700 Known = Known.intersectWith(Known2);
1701 break;
1702 case ISD::SETCC: {
1703 SDValue Op0 = Op.getOperand(0);
1704 SDValue Op1 = Op.getOperand(1);
1705 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1706 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1707 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1708 // -1, we may be able to bypass the setcc.
1709 if (DemandedBits.isSignMask() &&
1713 // If we're testing X < 0, then this compare isn't needed - just use X!
1714 // FIXME: We're limiting to integer types here, but this should also work
1715 // if we don't care about FP signed-zero. The use of SETLT with FP means
1716 // that we don't care about NaNs.
1717 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1719 return TLO.CombineTo(Op, Op0);
1720
1721 // TODO: Should we check for other forms of sign-bit comparisons?
1722 // Examples: X <= -1, X >= 0
1723 }
1724 if (getBooleanContents(Op0.getValueType()) ==
1726 BitWidth > 1)
1727 Known.Zero.setBitsFrom(1);
1728 break;
1729 }
1730 case ISD::SHL: {
1731 SDValue Op0 = Op.getOperand(0);
1732 SDValue Op1 = Op.getOperand(1);
1733 EVT ShiftVT = Op1.getValueType();
1734
1735 if (std::optional<uint64_t> KnownSA =
1736 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1737 unsigned ShAmt = *KnownSA;
1738 if (ShAmt == 0)
1739 return TLO.CombineTo(Op, Op0);
1740
1741 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1742 // single shift. We can do this if the bottom bits (which are shifted
1743 // out) are never demanded.
1744 // TODO - support non-uniform vector amounts.
1745 if (Op0.getOpcode() == ISD::SRL) {
1746 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1747 if (std::optional<uint64_t> InnerSA =
1748 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1749 unsigned C1 = *InnerSA;
1750 unsigned Opc = ISD::SHL;
1751 int Diff = ShAmt - C1;
1752 if (Diff < 0) {
1753 Diff = -Diff;
1754 Opc = ISD::SRL;
1755 }
1756 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1757 return TLO.CombineTo(
1758 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1759 }
1760 }
1761 }
1762
1763 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1764 // are not demanded. This will likely allow the anyext to be folded away.
1765 // TODO - support non-uniform vector amounts.
1766 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1767 SDValue InnerOp = Op0.getOperand(0);
1768 EVT InnerVT = InnerOp.getValueType();
1769 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1770 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1771 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1772 SDValue NarrowShl = TLO.DAG.getNode(
1773 ISD::SHL, dl, InnerVT, InnerOp,
1774 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1775 return TLO.CombineTo(
1776 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1777 }
1778
1779 // Repeat the SHL optimization above in cases where an extension
1780 // intervenes: (shl (anyext (shr x, c1)), c2) to
1781 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1782 // aren't demanded (as above) and that the shifted upper c1 bits of
1783 // x aren't demanded.
1784 // TODO - support non-uniform vector amounts.
1785 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1786 InnerOp.hasOneUse()) {
1787 if (std::optional<uint64_t> SA2 = TLO.DAG.getValidShiftAmount(
1788 InnerOp, DemandedElts, Depth + 2)) {
1789 unsigned InnerShAmt = *SA2;
1790 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1791 DemandedBits.getActiveBits() <=
1792 (InnerBits - InnerShAmt + ShAmt) &&
1793 DemandedBits.countr_zero() >= ShAmt) {
1794 SDValue NewSA =
1795 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1796 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1797 InnerOp.getOperand(0));
1798 return TLO.CombineTo(
1799 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1800 }
1801 }
1802 }
1803 }
1804
1805 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1806 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1807 Depth + 1)) {
1808 SDNodeFlags Flags = Op.getNode()->getFlags();
1809 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1810 // Disable the nsw and nuw flags. We can no longer guarantee that we
1811 // won't wrap after simplification.
1812 Flags.setNoSignedWrap(false);
1813 Flags.setNoUnsignedWrap(false);
1814 Op->setFlags(Flags);
1815 }
1816 return true;
1817 }
1818 Known.Zero <<= ShAmt;
1819 Known.One <<= ShAmt;
1820 // low bits known zero.
1821 Known.Zero.setLowBits(ShAmt);
1822
1823 // Attempt to avoid multi-use ops if we don't need anything from them.
1824 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1825 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1826 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1827 if (DemandedOp0) {
1828 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1829 return TLO.CombineTo(Op, NewOp);
1830 }
1831 }
1832
1833 // TODO: Can we merge this fold with the one below?
1834 // Try shrinking the operation as long as the shift amount will still be
1835 // in range.
1836 if (ShAmt < DemandedBits.getActiveBits() && !VT.isVector() &&
1837 Op.getNode()->hasOneUse()) {
1838 // Search for the smallest integer type with free casts to and from
1839 // Op's type. For expedience, just check power-of-2 integer types.
1840 unsigned DemandedSize = DemandedBits.getActiveBits();
1841 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
1842 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
1843 EVT SmallVT = EVT::getIntegerVT(*TLO.DAG.getContext(), SmallVTBits);
1844 if (isNarrowingProfitable(VT, SmallVT) &&
1845 isTypeDesirableForOp(ISD::SHL, SmallVT) &&
1846 isTruncateFree(VT, SmallVT) && isZExtFree(SmallVT, VT) &&
1847 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, SmallVT))) {
1848 assert(DemandedSize <= SmallVTBits &&
1849 "Narrowed below demanded bits?");
1850 // We found a type with free casts.
1851 SDValue NarrowShl = TLO.DAG.getNode(
1852 ISD::SHL, dl, SmallVT,
1853 TLO.DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
1854 TLO.DAG.getShiftAmountConstant(ShAmt, SmallVT, dl));
1855 return TLO.CombineTo(
1856 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1857 }
1858 }
1859 }
1860
1861 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1862 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1863 // Only do this if we demand the upper half so the knownbits are correct.
1864 unsigned HalfWidth = BitWidth / 2;
1865 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1866 DemandedBits.countLeadingOnes() >= HalfWidth) {
1867 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1868 if (isNarrowingProfitable(VT, HalfVT) &&
1869 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1870 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1871 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1872 // If we're demanding the upper bits at all, we must ensure
1873 // that the upper bits of the shift result are known to be zero,
1874 // which is equivalent to the narrow shift being NUW.
1875 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1876 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1877 SDNodeFlags Flags;
1878 Flags.setNoSignedWrap(IsNSW);
1879 Flags.setNoUnsignedWrap(IsNUW);
1880 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1881 SDValue NewShiftAmt =
1882 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1883 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1884 NewShiftAmt, Flags);
1885 SDValue NewExt =
1886 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1887 return TLO.CombineTo(Op, NewExt);
1888 }
1889 }
1890 }
1891 } else {
1892 // This is a variable shift, so we can't shift the demand mask by a known
1893 // amount. But if we are not demanding high bits, then we are not
1894 // demanding those bits from the pre-shifted operand either.
1895 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1896 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1897 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1898 Depth + 1)) {
1899 SDNodeFlags Flags = Op.getNode()->getFlags();
1900 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1901 // Disable the nsw and nuw flags. We can no longer guarantee that we
1902 // won't wrap after simplification.
1903 Flags.setNoSignedWrap(false);
1904 Flags.setNoUnsignedWrap(false);
1905 Op->setFlags(Flags);
1906 }
1907 return true;
1908 }
1909 Known.resetAll();
1910 }
1911 }
1912
1913 // If we are only demanding sign bits then we can use the shift source
1914 // directly.
1915 if (std::optional<uint64_t> MaxSA =
1916 TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
1917 unsigned ShAmt = *MaxSA;
1918 unsigned NumSignBits =
1919 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1920 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1921 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1922 return TLO.CombineTo(Op, Op0);
1923 }
1924 break;
1925 }
1926 case ISD::SRL: {
1927 SDValue Op0 = Op.getOperand(0);
1928 SDValue Op1 = Op.getOperand(1);
1929 EVT ShiftVT = Op1.getValueType();
1930
1931 if (std::optional<uint64_t> KnownSA =
1932 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
1933 unsigned ShAmt = *KnownSA;
1934 if (ShAmt == 0)
1935 return TLO.CombineTo(Op, Op0);
1936
1937 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1938 // single shift. We can do this if the top bits (which are shifted out)
1939 // are never demanded.
1940 // TODO - support non-uniform vector amounts.
1941 if (Op0.getOpcode() == ISD::SHL) {
1942 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1943 if (std::optional<uint64_t> InnerSA =
1944 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1945 unsigned C1 = *InnerSA;
1946 unsigned Opc = ISD::SRL;
1947 int Diff = ShAmt - C1;
1948 if (Diff < 0) {
1949 Diff = -Diff;
1950 Opc = ISD::SHL;
1951 }
1952 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1953 return TLO.CombineTo(
1954 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1955 }
1956 }
1957 }
1958
1959 // If this is (srl (sra X, C1), ShAmt), see if we can combine this into a
1960 // single sra. We can do this if the top bits are never demanded.
1961 if (Op0.getOpcode() == ISD::SRA && Op0.hasOneUse()) {
1962 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1963 if (std::optional<uint64_t> InnerSA =
1964 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
1965 unsigned C1 = *InnerSA;
1966 // Clamp the combined shift amount if it exceeds the bit width.
1967 unsigned Combined = std::min(C1 + ShAmt, BitWidth - 1);
1968 SDValue NewSA = TLO.DAG.getConstant(Combined, dl, ShiftVT);
1969 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRA, dl, VT,
1970 Op0.getOperand(0), NewSA));
1971 }
1972 }
1973 }
1974
1975 APInt InDemandedMask = (DemandedBits << ShAmt);
1976
1977 // If the shift is exact, then it does demand the low bits (and knows that
1978 // they are zero).
1979 if (Op->getFlags().hasExact())
1980 InDemandedMask.setLowBits(ShAmt);
1981
1982 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1983 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1984 if ((BitWidth % 2) == 0 && !VT.isVector()) {
1986 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
1987 if (isNarrowingProfitable(VT, HalfVT) &&
1988 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
1989 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1990 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
1991 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1992 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
1993 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1994 SDValue NewShiftAmt =
1995 TLO.DAG.getShiftAmountConstant(ShAmt, HalfVT, dl);
1996 SDValue NewShift =
1997 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
1998 return TLO.CombineTo(
1999 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
2000 }
2001 }
2002
2003 // Compute the new bits that are at the top now.
2004 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2005 Depth + 1))
2006 return true;
2007 Known.Zero.lshrInPlace(ShAmt);
2008 Known.One.lshrInPlace(ShAmt);
2009 // High bits known zero.
2010 Known.Zero.setHighBits(ShAmt);
2011
2012 // Attempt to avoid multi-use ops if we don't need anything from them.
2013 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2014 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2015 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2016 if (DemandedOp0) {
2017 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
2018 return TLO.CombineTo(Op, NewOp);
2019 }
2020 }
2021 } else {
2022 // Use generic knownbits computation as it has support for non-uniform
2023 // shift amounts.
2024 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2025 }
2026
2027 // Try to match AVG patterns (after shift simplification).
2028 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2029 DemandedElts, Depth + 1))
2030 return TLO.CombineTo(Op, AVG);
2031
2032 break;
2033 }
2034 case ISD::SRA: {
2035 SDValue Op0 = Op.getOperand(0);
2036 SDValue Op1 = Op.getOperand(1);
2037 EVT ShiftVT = Op1.getValueType();
2038
2039 // If we only want bits that already match the signbit then we don't need
2040 // to shift.
2041 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2042 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2043 NumHiDemandedBits)
2044 return TLO.CombineTo(Op, Op0);
2045
2046 // If this is an arithmetic shift right and only the low-bit is set, we can
2047 // always convert this into a logical shr, even if the shift amount is
2048 // variable. The low bit of the shift cannot be an input sign bit unless
2049 // the shift amount is >= the size of the datatype, which is undefined.
2050 if (DemandedBits.isOne())
2051 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2052
2053 if (std::optional<uint64_t> KnownSA =
2054 TLO.DAG.getValidShiftAmount(Op, DemandedElts, Depth + 1)) {
2055 unsigned ShAmt = *KnownSA;
2056 if (ShAmt == 0)
2057 return TLO.CombineTo(Op, Op0);
2058
2059 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2060 // supports sext_inreg.
2061 if (Op0.getOpcode() == ISD::SHL) {
2062 if (std::optional<uint64_t> InnerSA =
2063 TLO.DAG.getValidShiftAmount(Op0, DemandedElts, Depth + 2)) {
2064 unsigned LowBits = BitWidth - ShAmt;
2065 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2066 if (VT.isVector())
2067 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2069
2070 if (*InnerSA == ShAmt) {
2071 if (!TLO.LegalOperations() ||
2073 return TLO.CombineTo(
2074 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2075 Op0.getOperand(0),
2076 TLO.DAG.getValueType(ExtVT)));
2077
2078 // Even if we can't convert to sext_inreg, we might be able to
2079 // remove this shift pair if the input is already sign extended.
2080 unsigned NumSignBits =
2081 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2082 if (NumSignBits > ShAmt)
2083 return TLO.CombineTo(Op, Op0.getOperand(0));
2084 }
2085 }
2086 }
2087
2088 APInt InDemandedMask = (DemandedBits << ShAmt);
2089
2090 // If the shift is exact, then it does demand the low bits (and knows that
2091 // they are zero).
2092 if (Op->getFlags().hasExact())
2093 InDemandedMask.setLowBits(ShAmt);
2094
2095 // If any of the demanded bits are produced by the sign extension, we also
2096 // demand the input sign bit.
2097 if (DemandedBits.countl_zero() < ShAmt)
2098 InDemandedMask.setSignBit();
2099
2100 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2101 Depth + 1))
2102 return true;
2103 Known.Zero.lshrInPlace(ShAmt);
2104 Known.One.lshrInPlace(ShAmt);
2105
2106 // If the input sign bit is known to be zero, or if none of the top bits
2107 // are demanded, turn this into an unsigned shift right.
2108 if (Known.Zero[BitWidth - ShAmt - 1] ||
2109 DemandedBits.countl_zero() >= ShAmt) {
2110 SDNodeFlags Flags;
2111 Flags.setExact(Op->getFlags().hasExact());
2112 return TLO.CombineTo(
2113 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2114 }
2115
2116 int Log2 = DemandedBits.exactLogBase2();
2117 if (Log2 >= 0) {
2118 // The bit must come from the sign.
2119 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2120 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2121 }
2122
2123 if (Known.One[BitWidth - ShAmt - 1])
2124 // New bits are known one.
2125 Known.One.setHighBits(ShAmt);
2126
2127 // Attempt to avoid multi-use ops if we don't need anything from them.
2128 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2129 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2130 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2131 if (DemandedOp0) {
2132 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2133 return TLO.CombineTo(Op, NewOp);
2134 }
2135 }
2136 }
2137
2138 // Try to match AVG patterns (after shift simplification).
2139 if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
2140 DemandedElts, Depth + 1))
2141 return TLO.CombineTo(Op, AVG);
2142
2143 break;
2144 }
2145 case ISD::FSHL:
2146 case ISD::FSHR: {
2147 SDValue Op0 = Op.getOperand(0);
2148 SDValue Op1 = Op.getOperand(1);
2149 SDValue Op2 = Op.getOperand(2);
2150 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2151
2152 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2153 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2154
2155 // For fshl, 0-shift returns the 1st arg.
2156 // For fshr, 0-shift returns the 2nd arg.
2157 if (Amt == 0) {
2158 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2159 Known, TLO, Depth + 1))
2160 return true;
2161 break;
2162 }
2163
2164 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2165 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2166 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2167 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2168 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2169 Depth + 1))
2170 return true;
2171 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2172 Depth + 1))
2173 return true;
2174
2175 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2176 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2177 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2178 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2179 Known = Known.unionWith(Known2);
2180
2181 // Attempt to avoid multi-use ops if we don't need anything from them.
2182 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2183 !DemandedElts.isAllOnes()) {
2184 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2185 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2186 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2187 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2188 if (DemandedOp0 || DemandedOp1) {
2189 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2190 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2191 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2192 DemandedOp1, Op2);
2193 return TLO.CombineTo(Op, NewOp);
2194 }
2195 }
2196 }
2197
2198 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2199 if (isPowerOf2_32(BitWidth)) {
2200 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2201 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2202 Known2, TLO, Depth + 1))
2203 return true;
2204 }
2205 break;
2206 }
2207 case ISD::ROTL:
2208 case ISD::ROTR: {
2209 SDValue Op0 = Op.getOperand(0);
2210 SDValue Op1 = Op.getOperand(1);
2211 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2212
2213 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2214 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2215 return TLO.CombineTo(Op, Op0);
2216
2217 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2218 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2219 unsigned RevAmt = BitWidth - Amt;
2220
2221 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2222 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2223 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2224 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2225 Depth + 1))
2226 return true;
2227
2228 // rot*(x, 0) --> x
2229 if (Amt == 0)
2230 return TLO.CombineTo(Op, Op0);
2231
2232 // See if we don't demand either half of the rotated bits.
2233 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2234 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2235 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2236 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2237 }
2238 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2239 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2240 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2241 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2242 }
2243 }
2244
2245 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2246 if (isPowerOf2_32(BitWidth)) {
2247 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2248 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2249 Depth + 1))
2250 return true;
2251 }
2252 break;
2253 }
2254 case ISD::SMIN:
2255 case ISD::SMAX:
2256 case ISD::UMIN:
2257 case ISD::UMAX: {
2258 unsigned Opc = Op.getOpcode();
2259 SDValue Op0 = Op.getOperand(0);
2260 SDValue Op1 = Op.getOperand(1);
2261
2262 // If we're only demanding signbits, then we can simplify to OR/AND node.
2263 unsigned BitOp =
2264 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2265 unsigned NumSignBits =
2266 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2267 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2268 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2269 if (NumSignBits >= NumDemandedUpperBits)
2270 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2271
2272 // Check if one arg is always less/greater than (or equal) to the other arg.
2273 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2274 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2275 switch (Opc) {
2276 case ISD::SMIN:
2277 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2278 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2279 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2280 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2281 Known = KnownBits::smin(Known0, Known1);
2282 break;
2283 case ISD::SMAX:
2284 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2285 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2286 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2287 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2288 Known = KnownBits::smax(Known0, Known1);
2289 break;
2290 case ISD::UMIN:
2291 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2292 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2293 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2294 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2295 Known = KnownBits::umin(Known0, Known1);
2296 break;
2297 case ISD::UMAX:
2298 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2299 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2300 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2301 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2302 Known = KnownBits::umax(Known0, Known1);
2303 break;
2304 }
2305 break;
2306 }
2307 case ISD::BITREVERSE: {
2308 SDValue Src = Op.getOperand(0);
2309 APInt DemandedSrcBits = DemandedBits.reverseBits();
2310 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2311 Depth + 1))
2312 return true;
2313 Known.One = Known2.One.reverseBits();
2314 Known.Zero = Known2.Zero.reverseBits();
2315 break;
2316 }
2317 case ISD::BSWAP: {
2318 SDValue Src = Op.getOperand(0);
2319
2320 // If the only bits demanded come from one byte of the bswap result,
2321 // just shift the input byte into position to eliminate the bswap.
2322 unsigned NLZ = DemandedBits.countl_zero();
2323 unsigned NTZ = DemandedBits.countr_zero();
2324
2325 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2326 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2327 // have 14 leading zeros, round to 8.
2328 NLZ = alignDown(NLZ, 8);
2329 NTZ = alignDown(NTZ, 8);
2330 // If we need exactly one byte, we can do this transformation.
2331 if (BitWidth - NLZ - NTZ == 8) {
2332 // Replace this with either a left or right shift to get the byte into
2333 // the right place.
2334 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2335 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2336 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2337 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2338 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2339 return TLO.CombineTo(Op, NewOp);
2340 }
2341 }
2342
2343 APInt DemandedSrcBits = DemandedBits.byteSwap();
2344 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2345 Depth + 1))
2346 return true;
2347 Known.One = Known2.One.byteSwap();
2348 Known.Zero = Known2.Zero.byteSwap();
2349 break;
2350 }
2351 case ISD::CTPOP: {
2352 // If only 1 bit is demanded, replace with PARITY as long as we're before
2353 // op legalization.
2354 // FIXME: Limit to scalars for now.
2355 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2356 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2357 Op.getOperand(0)));
2358
2359 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2360 break;
2361 }
2363 SDValue Op0 = Op.getOperand(0);
2364 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2365 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2366
2367 // If we only care about the highest bit, don't bother shifting right.
2368 if (DemandedBits.isSignMask()) {
2369 unsigned MinSignedBits =
2370 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2371 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2372 // However if the input is already sign extended we expect the sign
2373 // extension to be dropped altogether later and do not simplify.
2374 if (!AlreadySignExtended) {
2375 // Compute the correct shift amount type, which must be getShiftAmountTy
2376 // for scalar types after legalization.
2377 SDValue ShiftAmt =
2378 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2379 return TLO.CombineTo(Op,
2380 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2381 }
2382 }
2383
2384 // If none of the extended bits are demanded, eliminate the sextinreg.
2385 if (DemandedBits.getActiveBits() <= ExVTBits)
2386 return TLO.CombineTo(Op, Op0);
2387
2388 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2389
2390 // Since the sign extended bits are demanded, we know that the sign
2391 // bit is demanded.
2392 InputDemandedBits.setBit(ExVTBits - 1);
2393
2394 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2395 Depth + 1))
2396 return true;
2397
2398 // If the sign bit of the input is known set or clear, then we know the
2399 // top bits of the result.
2400
2401 // If the input sign bit is known zero, convert this into a zero extension.
2402 if (Known.Zero[ExVTBits - 1])
2403 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2404
2405 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2406 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2407 Known.One.setBitsFrom(ExVTBits);
2408 Known.Zero &= Mask;
2409 } else { // Input sign bit unknown
2410 Known.Zero &= Mask;
2411 Known.One &= Mask;
2412 }
2413 break;
2414 }
2415 case ISD::BUILD_PAIR: {
2416 EVT HalfVT = Op.getOperand(0).getValueType();
2417 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2418
2419 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2420 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2421
2422 KnownBits KnownLo, KnownHi;
2423
2424 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2425 return true;
2426
2427 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2428 return true;
2429
2430 Known = KnownHi.concat(KnownLo);
2431 break;
2432 }
2434 if (VT.isScalableVector())
2435 return false;
2436 [[fallthrough]];
2437 case ISD::ZERO_EXTEND: {
2438 SDValue Src = Op.getOperand(0);
2439 EVT SrcVT = Src.getValueType();
2440 unsigned InBits = SrcVT.getScalarSizeInBits();
2441 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2442 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2443
2444 // If none of the top bits are demanded, convert this into an any_extend.
2445 if (DemandedBits.getActiveBits() <= InBits) {
2446 // If we only need the non-extended bits of the bottom element
2447 // then we can just bitcast to the result.
2448 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2449 VT.getSizeInBits() == SrcVT.getSizeInBits())
2450 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2451
2452 unsigned Opc =
2454 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2455 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2456 }
2457
2458 SDNodeFlags Flags = Op->getFlags();
2459 APInt InDemandedBits = DemandedBits.trunc(InBits);
2460 APInt InDemandedElts = DemandedElts.zext(InElts);
2461 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2462 Depth + 1)) {
2463 if (Flags.hasNonNeg()) {
2464 Flags.setNonNeg(false);
2465 Op->setFlags(Flags);
2466 }
2467 return true;
2468 }
2469 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2470 Known = Known.zext(BitWidth);
2471
2472 // Attempt to avoid multi-use ops if we don't need anything from them.
2473 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2474 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2475 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2476 break;
2477 }
2479 if (VT.isScalableVector())
2480 return false;
2481 [[fallthrough]];
2482 case ISD::SIGN_EXTEND: {
2483 SDValue Src = Op.getOperand(0);
2484 EVT SrcVT = Src.getValueType();
2485 unsigned InBits = SrcVT.getScalarSizeInBits();
2486 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2487 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2488
2489 APInt InDemandedElts = DemandedElts.zext(InElts);
2490 APInt InDemandedBits = DemandedBits.trunc(InBits);
2491
2492 // Since some of the sign extended bits are demanded, we know that the sign
2493 // bit is demanded.
2494 InDemandedBits.setBit(InBits - 1);
2495
2496 // If none of the top bits are demanded, convert this into an any_extend.
2497 if (DemandedBits.getActiveBits() <= InBits) {
2498 // If we only need the non-extended bits of the bottom element
2499 // then we can just bitcast to the result.
2500 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2501 VT.getSizeInBits() == SrcVT.getSizeInBits())
2502 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2503
2504 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2506 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2507 InBits) {
2508 unsigned Opc =
2510 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2511 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2512 }
2513 }
2514
2515 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2516 Depth + 1))
2517 return true;
2518 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2519
2520 // If the sign bit is known one, the top bits match.
2521 Known = Known.sext(BitWidth);
2522
2523 // If the sign bit is known zero, convert this to a zero extend.
2524 if (Known.isNonNegative()) {
2525 unsigned Opc =
2527 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2528 SDNodeFlags Flags;
2529 if (!IsVecInReg)
2530 Flags.setNonNeg(true);
2531 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2532 }
2533 }
2534
2535 // Attempt to avoid multi-use ops if we don't need anything from them.
2536 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2537 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2538 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2539 break;
2540 }
2542 if (VT.isScalableVector())
2543 return false;
2544 [[fallthrough]];
2545 case ISD::ANY_EXTEND: {
2546 SDValue Src = Op.getOperand(0);
2547 EVT SrcVT = Src.getValueType();
2548 unsigned InBits = SrcVT.getScalarSizeInBits();
2549 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2550 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2551
2552 // If we only need the bottom element then we can just bitcast.
2553 // TODO: Handle ANY_EXTEND?
2554 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2555 VT.getSizeInBits() == SrcVT.getSizeInBits())
2556 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2557
2558 APInt InDemandedBits = DemandedBits.trunc(InBits);
2559 APInt InDemandedElts = DemandedElts.zext(InElts);
2560 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2561 Depth + 1))
2562 return true;
2563 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2564 Known = Known.anyext(BitWidth);
2565
2566 // Attempt to avoid multi-use ops if we don't need anything from them.
2567 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2568 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2569 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2570 break;
2571 }
2572 case ISD::TRUNCATE: {
2573 SDValue Src = Op.getOperand(0);
2574
2575 // Simplify the input, using demanded bit information, and compute the known
2576 // zero/one bits live out.
2577 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2578 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2579 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2580 Depth + 1))
2581 return true;
2582 Known = Known.trunc(BitWidth);
2583
2584 // Attempt to avoid multi-use ops if we don't need anything from them.
2585 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2586 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2587 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2588
2589 // If the input is only used by this truncate, see if we can shrink it based
2590 // on the known demanded bits.
2591 switch (Src.getOpcode()) {
2592 default:
2593 break;
2594 case ISD::SRL:
2595 // Shrink SRL by a constant if none of the high bits shifted in are
2596 // demanded.
2597 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2598 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2599 // undesirable.
2600 break;
2601
2602 if (Src.getNode()->hasOneUse()) {
2603 if (isTruncateFree(Src, VT) &&
2604 !isTruncateFree(Src.getValueType(), VT)) {
2605 // If truncate is only free at trunc(srl), do not turn it into
2606 // srl(trunc). The check is done by first check the truncate is free
2607 // at Src's opcode(srl), then check the truncate is not done by
2608 // referencing sub-register. In test, if both trunc(srl) and
2609 // srl(trunc)'s trunc are free, srl(trunc) performs better. If only
2610 // trunc(srl)'s trunc is free, trunc(srl) is better.
2611 break;
2612 }
2613
2614 std::optional<uint64_t> ShAmtC =
2615 TLO.DAG.getValidShiftAmount(Src, DemandedElts, Depth + 2);
2616 if (!ShAmtC || *ShAmtC >= BitWidth)
2617 break;
2618 uint64_t ShVal = *ShAmtC;
2619
2620 APInt HighBits =
2621 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2622 HighBits.lshrInPlace(ShVal);
2623 HighBits = HighBits.trunc(BitWidth);
2624 if (!(HighBits & DemandedBits)) {
2625 // None of the shifted in bits are needed. Add a truncate of the
2626 // shift input, then shift it.
2627 SDValue NewShAmt = TLO.DAG.getShiftAmountConstant(ShVal, VT, dl);
2628 SDValue NewTrunc =
2629 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2630 return TLO.CombineTo(
2631 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2632 }
2633 }
2634 break;
2635 }
2636
2637 break;
2638 }
2639 case ISD::AssertZext: {
2640 // AssertZext demands all of the high bits, plus any of the low bits
2641 // demanded by its users.
2642 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2644 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2645 TLO, Depth + 1))
2646 return true;
2647
2648 Known.Zero |= ~InMask;
2649 Known.One &= (~Known.Zero);
2650 break;
2651 }
2653 SDValue Src = Op.getOperand(0);
2654 SDValue Idx = Op.getOperand(1);
2655 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2656 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2657
2658 if (SrcEltCnt.isScalable())
2659 return false;
2660
2661 // Demand the bits from every vector element without a constant index.
2662 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2663 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2664 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2665 if (CIdx->getAPIntValue().ult(NumSrcElts))
2666 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2667
2668 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2669 // anything about the extended bits.
2670 APInt DemandedSrcBits = DemandedBits;
2671 if (BitWidth > EltBitWidth)
2672 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2673
2674 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2675 Depth + 1))
2676 return true;
2677
2678 // Attempt to avoid multi-use ops if we don't need anything from them.
2679 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2680 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2681 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2682 SDValue NewOp =
2683 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2684 return TLO.CombineTo(Op, NewOp);
2685 }
2686 }
2687
2688 Known = Known2;
2689 if (BitWidth > EltBitWidth)
2690 Known = Known.anyext(BitWidth);
2691 break;
2692 }
2693 case ISD::BITCAST: {
2694 if (VT.isScalableVector())
2695 return false;
2696 SDValue Src = Op.getOperand(0);
2697 EVT SrcVT = Src.getValueType();
2698 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2699
2700 // If this is an FP->Int bitcast and if the sign bit is the only
2701 // thing demanded, turn this into a FGETSIGN.
2702 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2703 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2704 SrcVT.isFloatingPoint()) {
2705 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2706 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2707 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2708 SrcVT != MVT::f128) {
2709 // Cannot eliminate/lower SHL for f128 yet.
2710 EVT Ty = OpVTLegal ? VT : MVT::i32;
2711 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2712 // place. We expect the SHL to be eliminated by other optimizations.
2713 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2714 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2715 if (!OpVTLegal && OpVTSizeInBits > 32)
2716 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2717 unsigned ShVal = Op.getValueSizeInBits() - 1;
2718 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2719 return TLO.CombineTo(Op,
2720 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2721 }
2722 }
2723
2724 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2725 // Demand the elt/bit if any of the original elts/bits are demanded.
2726 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2727 unsigned Scale = BitWidth / NumSrcEltBits;
2728 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2729 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2730 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2731 for (unsigned i = 0; i != Scale; ++i) {
2732 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2733 unsigned BitOffset = EltOffset * NumSrcEltBits;
2734 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2735 if (!Sub.isZero()) {
2736 DemandedSrcBits |= Sub;
2737 for (unsigned j = 0; j != NumElts; ++j)
2738 if (DemandedElts[j])
2739 DemandedSrcElts.setBit((j * Scale) + i);
2740 }
2741 }
2742
2743 APInt KnownSrcUndef, KnownSrcZero;
2744 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2745 KnownSrcZero, TLO, Depth + 1))
2746 return true;
2747
2748 KnownBits KnownSrcBits;
2749 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2750 KnownSrcBits, TLO, Depth + 1))
2751 return true;
2752 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2753 // TODO - bigendian once we have test coverage.
2754 unsigned Scale = NumSrcEltBits / BitWidth;
2755 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2756 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2757 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2758 for (unsigned i = 0; i != NumElts; ++i)
2759 if (DemandedElts[i]) {
2760 unsigned Offset = (i % Scale) * BitWidth;
2761 DemandedSrcBits.insertBits(DemandedBits, Offset);
2762 DemandedSrcElts.setBit(i / Scale);
2763 }
2764
2765 if (SrcVT.isVector()) {
2766 APInt KnownSrcUndef, KnownSrcZero;
2767 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2768 KnownSrcZero, TLO, Depth + 1))
2769 return true;
2770 }
2771
2772 KnownBits KnownSrcBits;
2773 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2774 KnownSrcBits, TLO, Depth + 1))
2775 return true;
2776
2777 // Attempt to avoid multi-use ops if we don't need anything from them.
2778 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2779 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2780 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2781 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2782 return TLO.CombineTo(Op, NewOp);
2783 }
2784 }
2785 }
2786
2787 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2788 // recursive call where Known may be useful to the caller.
2789 if (Depth > 0) {
2790 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2791 return false;
2792 }
2793 break;
2794 }
2795 case ISD::MUL:
2796 if (DemandedBits.isPowerOf2()) {
2797 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2798 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2799 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2800 unsigned CTZ = DemandedBits.countr_zero();
2801 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2802 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2803 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2804 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2805 return TLO.CombineTo(Op, Shl);
2806 }
2807 }
2808 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2809 // X * X is odd iff X is odd.
2810 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2811 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2812 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2813 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2814 return TLO.CombineTo(Op, And1);
2815 }
2816 [[fallthrough]];
2817 case ISD::ADD:
2818 case ISD::SUB: {
2819 // Add, Sub, and Mul don't demand any bits in positions beyond that
2820 // of the highest bit demanded of them.
2821 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2822 SDNodeFlags Flags = Op.getNode()->getFlags();
2823 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2824 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2825 KnownBits KnownOp0, KnownOp1;
2826 auto GetDemandedBitsLHSMask = [&](APInt Demanded,
2827 const KnownBits &KnownRHS) {
2828 if (Op.getOpcode() == ISD::MUL)
2829 Demanded.clearHighBits(KnownRHS.countMinTrailingZeros());
2830 return Demanded;
2831 };
2832 if (SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2833 Depth + 1) ||
2834 SimplifyDemandedBits(Op0, GetDemandedBitsLHSMask(LoMask, KnownOp1),
2835 DemandedElts, KnownOp0, TLO, Depth + 1) ||
2836 // See if the operation should be performed at a smaller bit width.
2837 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2838 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2839 // Disable the nsw and nuw flags. We can no longer guarantee that we
2840 // won't wrap after simplification.
2841 Flags.setNoSignedWrap(false);
2842 Flags.setNoUnsignedWrap(false);
2843 Op->setFlags(Flags);
2844 }
2845 return true;
2846 }
2847
2848 // neg x with only low bit demanded is simply x.
2849 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2850 isNullConstant(Op0))
2851 return TLO.CombineTo(Op, Op1);
2852
2853 // Attempt to avoid multi-use ops if we don't need anything from them.
2854 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2855 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2856 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2857 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2858 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2859 if (DemandedOp0 || DemandedOp1) {
2860 Flags.setNoSignedWrap(false);
2861 Flags.setNoUnsignedWrap(false);
2862 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2863 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2864 SDValue NewOp =
2865 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2866 return TLO.CombineTo(Op, NewOp);
2867 }
2868 }
2869
2870 // If we have a constant operand, we may be able to turn it into -1 if we
2871 // do not demand the high bits. This can make the constant smaller to
2872 // encode, allow more general folding, or match specialized instruction
2873 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2874 // is probably not useful (and could be detrimental).
2876 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2877 if (C && !C->isAllOnes() && !C->isOne() &&
2878 (C->getAPIntValue() | HighMask).isAllOnes()) {
2879 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2880 // Disable the nsw and nuw flags. We can no longer guarantee that we
2881 // won't wrap after simplification.
2882 Flags.setNoSignedWrap(false);
2883 Flags.setNoUnsignedWrap(false);
2884 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2885 return TLO.CombineTo(Op, NewOp);
2886 }
2887
2888 // Match a multiply with a disguised negated-power-of-2 and convert to a
2889 // an equivalent shift-left amount.
2890 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2891 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2892 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2893 return 0;
2894
2895 // Don't touch opaque constants. Also, ignore zero and power-of-2
2896 // multiplies. Those will get folded later.
2897 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2898 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2899 !MulC->getAPIntValue().isPowerOf2()) {
2900 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2901 if (UnmaskedC.isNegatedPowerOf2())
2902 return (-UnmaskedC).logBase2();
2903 }
2904 return 0;
2905 };
2906
2907 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2908 unsigned ShlAmt) {
2909 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2910 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2911 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2912 return TLO.CombineTo(Op, Res);
2913 };
2914
2916 if (Op.getOpcode() == ISD::ADD) {
2917 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2918 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2919 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2920 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2921 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2922 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2923 }
2924 if (Op.getOpcode() == ISD::SUB) {
2925 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2926 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2927 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2928 }
2929 }
2930
2931 if (Op.getOpcode() == ISD::MUL) {
2932 Known = KnownBits::mul(KnownOp0, KnownOp1);
2933 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2935 Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2936 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2937 }
2938 break;
2939 }
2940 default:
2941 // We also ask the target about intrinsics (which could be specific to it).
2942 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2943 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2944 // TODO: Probably okay to remove after audit; here to reduce change size
2945 // in initial enablement patch for scalable vectors
2946 if (Op.getValueType().isScalableVector())
2947 break;
2948 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2949 Known, TLO, Depth))
2950 return true;
2951 break;
2952 }
2953
2954 // Just use computeKnownBits to compute output bits.
2955 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2956 break;
2957 }
2958
2959 // If we know the value of all of the demanded bits, return this as a
2960 // constant.
2961 if (!isTargetCanonicalConstantNode(Op) &&
2962 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2963 // Avoid folding to a constant if any OpaqueConstant is involved.
2964 const SDNode *N = Op.getNode();
2965 for (SDNode *Op :
2967 if (auto *C = dyn_cast<ConstantSDNode>(Op))
2968 if (C->isOpaque())
2969 return false;
2970 }
2971 if (VT.isInteger())
2972 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2973 if (VT.isFloatingPoint())
2974 return TLO.CombineTo(
2975 Op, TLO.DAG.getConstantFP(APFloat(VT.getFltSemantics(), Known.One),
2976 dl, VT));
2977 }
2978
2979 // A multi use 'all demanded elts' simplify failed to find any knownbits.
2980 // Try again just for the original demanded elts.
2981 // Ensure we do this AFTER constant folding above.
2982 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2983 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2984
2985 return false;
2986}
2987
2989 const APInt &DemandedElts,
2990 DAGCombinerInfo &DCI) const {
2991 SelectionDAG &DAG = DCI.DAG;
2992 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2993 !DCI.isBeforeLegalizeOps());
2994
2995 APInt KnownUndef, KnownZero;
2996 bool Simplified =
2997 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2998 if (Simplified) {
2999 DCI.AddToWorklist(Op.getNode());
3000 DCI.CommitTargetLoweringOpt(TLO);
3001 }
3002
3003 return Simplified;
3004}
3005
3006/// Given a vector binary operation and known undefined elements for each input
3007/// operand, compute whether each element of the output is undefined.
3009 const APInt &UndefOp0,
3010 const APInt &UndefOp1) {
3011 EVT VT = BO.getValueType();
3013 "Vector binop only");
3014
3015 EVT EltVT = VT.getVectorElementType();
3016 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
3017 assert(UndefOp0.getBitWidth() == NumElts &&
3018 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
3019
3020 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
3021 const APInt &UndefVals) {
3022 if (UndefVals[Index])
3023 return DAG.getUNDEF(EltVT);
3024
3025 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
3026 // Try hard to make sure that the getNode() call is not creating temporary
3027 // nodes. Ignore opaque integers because they do not constant fold.
3028 SDValue Elt = BV->getOperand(Index);
3029 auto *C = dyn_cast<ConstantSDNode>(Elt);
3030 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
3031 return Elt;
3032 }
3033
3034 return SDValue();
3035 };
3036
3037 APInt KnownUndef = APInt::getZero(NumElts);
3038 for (unsigned i = 0; i != NumElts; ++i) {
3039 // If both inputs for this element are either constant or undef and match
3040 // the element type, compute the constant/undef result for this element of
3041 // the vector.
3042 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
3043 // not handle FP constants. The code within getNode() should be refactored
3044 // to avoid the danger of creating a bogus temporary node here.
3045 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3046 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3047 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3048 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3049 KnownUndef.setBit(i);
3050 }
3051 return KnownUndef;
3052}
3053
3055 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3056 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3057 bool AssumeSingleUse) const {
3058 EVT VT = Op.getValueType();
3059 unsigned Opcode = Op.getOpcode();
3060 APInt DemandedElts = OriginalDemandedElts;
3061 unsigned NumElts = DemandedElts.getBitWidth();
3062 assert(VT.isVector() && "Expected vector op");
3063
3064 KnownUndef = KnownZero = APInt::getZero(NumElts);
3065
3066 if (!shouldSimplifyDemandedVectorElts(Op, TLO))
3067 return false;
3068
3069 // TODO: For now we assume we know nothing about scalable vectors.
3070 if (VT.isScalableVector())
3071 return false;
3072
3073 assert(VT.getVectorNumElements() == NumElts &&
3074 "Mask size mismatches value type element count!");
3075
3076 // Undef operand.
3077 if (Op.isUndef()) {
3078 KnownUndef.setAllBits();
3079 return false;
3080 }
3081
3082 // If Op has other users, assume that all elements are needed.
3083 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3084 DemandedElts.setAllBits();
3085
3086 // Not demanding any elements from Op.
3087 if (DemandedElts == 0) {
3088 KnownUndef.setAllBits();
3089 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3090 }
3091
3092 // Limit search depth.
3094 return false;
3095
3096 SDLoc DL(Op);
3097 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3098 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3099
3100 // Helper for demanding the specified elements and all the bits of both binary
3101 // operands.
3102 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3103 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3104 TLO.DAG, Depth + 1);
3105 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3106 TLO.DAG, Depth + 1);
3107 if (NewOp0 || NewOp1) {
3108 SDValue NewOp =
3109 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3110 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3111 return TLO.CombineTo(Op, NewOp);
3112 }
3113 return false;
3114 };
3115
3116 switch (Opcode) {
3117 case ISD::SCALAR_TO_VECTOR: {
3118 if (!DemandedElts[0]) {
3119 KnownUndef.setAllBits();
3120 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3121 }
3122 SDValue ScalarSrc = Op.getOperand(0);
3123 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3124 SDValue Src = ScalarSrc.getOperand(0);
3125 SDValue Idx = ScalarSrc.getOperand(1);
3126 EVT SrcVT = Src.getValueType();
3127
3128 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3129
3130 if (SrcEltCnt.isScalable())
3131 return false;
3132
3133 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3134 if (isNullConstant(Idx)) {
3135 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3136 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3137 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3138 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3139 TLO, Depth + 1))
3140 return true;
3141 }
3142 }
3143 KnownUndef.setHighBits(NumElts - 1);
3144 break;
3145 }
3146 case ISD::BITCAST: {
3147 SDValue Src = Op.getOperand(0);
3148 EVT SrcVT = Src.getValueType();
3149
3150 // We only handle vectors here.
3151 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3152 if (!SrcVT.isVector())
3153 break;
3154
3155 // Fast handling of 'identity' bitcasts.
3156 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3157 if (NumSrcElts == NumElts)
3158 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3159 KnownZero, TLO, Depth + 1);
3160
3161 APInt SrcDemandedElts, SrcZero, SrcUndef;
3162
3163 // Bitcast from 'large element' src vector to 'small element' vector, we
3164 // must demand a source element if any DemandedElt maps to it.
3165 if ((NumElts % NumSrcElts) == 0) {
3166 unsigned Scale = NumElts / NumSrcElts;
3167 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3168 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3169 TLO, Depth + 1))
3170 return true;
3171
3172 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3173 // of the large element.
3174 // TODO - bigendian once we have test coverage.
3175 if (IsLE) {
3176 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3177 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3178 for (unsigned i = 0; i != NumElts; ++i)
3179 if (DemandedElts[i]) {
3180 unsigned Ofs = (i % Scale) * EltSizeInBits;
3181 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3182 }
3183
3184 KnownBits Known;
3185 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3186 TLO, Depth + 1))
3187 return true;
3188
3189 // The bitcast has split each wide element into a number of
3190 // narrow subelements. We have just computed the Known bits
3191 // for wide elements. See if element splitting results in
3192 // some subelements being zero. Only for demanded elements!
3193 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3194 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3195 .isAllOnes())
3196 continue;
3197 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3198 unsigned Elt = Scale * SrcElt + SubElt;
3199 if (DemandedElts[Elt])
3200 KnownZero.setBit(Elt);
3201 }
3202 }
3203 }
3204
3205 // If the src element is zero/undef then all the output elements will be -
3206 // only demanded elements are guaranteed to be correct.
3207 for (unsigned i = 0; i != NumSrcElts; ++i) {
3208 if (SrcDemandedElts[i]) {
3209 if (SrcZero[i])
3210 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3211 if (SrcUndef[i])
3212 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3213 }
3214 }
3215 }
3216
3217 // Bitcast from 'small element' src vector to 'large element' vector, we
3218 // demand all smaller source elements covered by the larger demanded element
3219 // of this vector.
3220 if ((NumSrcElts % NumElts) == 0) {
3221 unsigned Scale = NumSrcElts / NumElts;
3222 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3223 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3224 TLO, Depth + 1))
3225 return true;
3226
3227 // If all the src elements covering an output element are zero/undef, then
3228 // the output element will be as well, assuming it was demanded.
3229 for (unsigned i = 0; i != NumElts; ++i) {
3230 if (DemandedElts[i]) {
3231 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3232 KnownZero.setBit(i);
3233 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3234 KnownUndef.setBit(i);
3235 }
3236 }
3237 }
3238 break;
3239 }
3240 case ISD::FREEZE: {
3241 SDValue N0 = Op.getOperand(0);
3242 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3243 /*PoisonOnly=*/false))
3244 return TLO.CombineTo(Op, N0);
3245
3246 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3247 // freeze(op(x, ...)) -> op(freeze(x), ...).
3248 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3249 return TLO.CombineTo(
3251 TLO.DAG.getFreeze(N0.getOperand(0))));
3252 break;
3253 }
3254 case ISD::BUILD_VECTOR: {
3255 // Check all elements and simplify any unused elements with UNDEF.
3256 if (!DemandedElts.isAllOnes()) {
3257 // Don't simplify BROADCASTS.
3258 if (llvm::any_of(Op->op_values(),
3259 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3260 SmallVector<SDValue, 32> Ops(Op->ops());
3261 bool Updated = false;
3262 for (unsigned i = 0; i != NumElts; ++i) {
3263 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3264 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3265 KnownUndef.setBit(i);
3266 Updated = true;
3267 }
3268 }
3269 if (Updated)
3270 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3271 }
3272 }
3273 for (unsigned i = 0; i != NumElts; ++i) {
3274 SDValue SrcOp = Op.getOperand(i);
3275 if (SrcOp.isUndef()) {
3276 KnownUndef.setBit(i);
3277 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3279 KnownZero.setBit(i);
3280 }
3281 }
3282 break;
3283 }
3284 case ISD::CONCAT_VECTORS: {
3285 EVT SubVT = Op.getOperand(0).getValueType();
3286 unsigned NumSubVecs = Op.getNumOperands();
3287 unsigned NumSubElts = SubVT.getVectorNumElements();
3288 for (unsigned i = 0; i != NumSubVecs; ++i) {
3289 SDValue SubOp = Op.getOperand(i);
3290 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3291 APInt SubUndef, SubZero;
3292 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3293 Depth + 1))
3294 return true;
3295 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3296 KnownZero.insertBits(SubZero, i * NumSubElts);
3297 }
3298
3299 // Attempt to avoid multi-use ops if we don't need anything from them.
3300 if (!DemandedElts.isAllOnes()) {
3301 bool FoundNewSub = false;
3302 SmallVector<SDValue, 2> DemandedSubOps;
3303 for (unsigned i = 0; i != NumSubVecs; ++i) {
3304 SDValue SubOp = Op.getOperand(i);
3305 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3306 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3307 SubOp, SubElts, TLO.DAG, Depth + 1);
3308 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3309 FoundNewSub = NewSubOp ? true : FoundNewSub;
3310 }
3311 if (FoundNewSub) {
3312 SDValue NewOp =
3313 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3314 return TLO.CombineTo(Op, NewOp);
3315 }
3316 }
3317 break;
3318 }
3319 case ISD::INSERT_SUBVECTOR: {
3320 // Demand any elements from the subvector and the remainder from the src its
3321 // inserted into.
3322 SDValue Src = Op.getOperand(0);
3323 SDValue Sub = Op.getOperand(1);
3324 uint64_t Idx = Op.getConstantOperandVal(2);
3325 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3326 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3327 APInt DemandedSrcElts = DemandedElts;
3328 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3329
3330 APInt SubUndef, SubZero;
3331 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3332 Depth + 1))
3333 return true;
3334
3335 // If none of the src operand elements are demanded, replace it with undef.
3336 if (!DemandedSrcElts && !Src.isUndef())
3337 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3338 TLO.DAG.getUNDEF(VT), Sub,
3339 Op.getOperand(2)));
3340
3341 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3342 TLO, Depth + 1))
3343 return true;
3344 KnownUndef.insertBits(SubUndef, Idx);
3345 KnownZero.insertBits(SubZero, Idx);
3346
3347 // Attempt to avoid multi-use ops if we don't need anything from them.
3348 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3349 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3350 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3351 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3352 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3353 if (NewSrc || NewSub) {
3354 NewSrc = NewSrc ? NewSrc : Src;
3355 NewSub = NewSub ? NewSub : Sub;
3356 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3357 NewSub, Op.getOperand(2));
3358 return TLO.CombineTo(Op, NewOp);
3359 }
3360 }
3361 break;
3362 }
3364 // Offset the demanded elts by the subvector index.
3365 SDValue Src = Op.getOperand(0);
3366 if (Src.getValueType().isScalableVector())
3367 break;
3368 uint64_t Idx = Op.getConstantOperandVal(1);
3369 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3370 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3371
3372 APInt SrcUndef, SrcZero;
3373 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3374 Depth + 1))
3375 return true;
3376 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3377 KnownZero = SrcZero.extractBits(NumElts, Idx);
3378
3379 // Attempt to avoid multi-use ops if we don't need anything from them.
3380 if (!DemandedElts.isAllOnes()) {
3381 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3382 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3383 if (NewSrc) {
3384 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3385 Op.getOperand(1));
3386 return TLO.CombineTo(Op, NewOp);
3387 }
3388 }
3389 break;
3390 }
3392 SDValue Vec = Op.getOperand(0);
3393 SDValue Scl = Op.getOperand(1);
3394 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3395
3396 // For a legal, constant insertion index, if we don't need this insertion
3397 // then strip it, else remove it from the demanded elts.
3398 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3399 unsigned Idx = CIdx->getZExtValue();
3400 if (!DemandedElts[Idx])
3401 return TLO.CombineTo(Op, Vec);
3402
3403 APInt DemandedVecElts(DemandedElts);
3404 DemandedVecElts.clearBit(Idx);
3405 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3406 KnownZero, TLO, Depth + 1))
3407 return true;
3408
3409 KnownUndef.setBitVal(Idx, Scl.isUndef());
3410
3411 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3412 break;
3413 }
3414
3415 APInt VecUndef, VecZero;
3416 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3417 Depth + 1))
3418 return true;
3419 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3420 break;
3421 }
3422 case ISD::VSELECT: {
3423 SDValue Sel = Op.getOperand(0);
3424 SDValue LHS = Op.getOperand(1);
3425 SDValue RHS = Op.getOperand(2);
3426
3427 // Try to transform the select condition based on the current demanded
3428 // elements.
3429 APInt UndefSel, ZeroSel;
3430 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3431 Depth + 1))
3432 return true;
3433
3434 // See if we can simplify either vselect operand.
3435 APInt DemandedLHS(DemandedElts);
3436 APInt DemandedRHS(DemandedElts);
3437 APInt UndefLHS, ZeroLHS;
3438 APInt UndefRHS, ZeroRHS;
3439 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3440 Depth + 1))
3441 return true;
3442 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3443 Depth + 1))
3444 return true;
3445
3446 KnownUndef = UndefLHS & UndefRHS;
3447 KnownZero = ZeroLHS & ZeroRHS;
3448
3449 // If we know that the selected element is always zero, we don't need the
3450 // select value element.
3451 APInt DemandedSel = DemandedElts & ~KnownZero;
3452 if (DemandedSel != DemandedElts)
3453 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3454 Depth + 1))
3455 return true;
3456
3457 break;
3458 }
3459 case ISD::VECTOR_SHUFFLE: {
3460 SDValue LHS = Op.getOperand(0);
3461 SDValue RHS = Op.getOperand(1);
3462 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3463
3464 // Collect demanded elements from shuffle operands..
3465 APInt DemandedLHS(NumElts, 0);
3466 APInt DemandedRHS(NumElts, 0);
3467 for (unsigned i = 0; i != NumElts; ++i) {
3468 int M = ShuffleMask[i];
3469 if (M < 0 || !DemandedElts[i])
3470 continue;
3471 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3472 if (M < (int)NumElts)
3473 DemandedLHS.setBit(M);
3474 else
3475 DemandedRHS.setBit(M - NumElts);
3476 }
3477
3478 // See if we can simplify either shuffle operand.
3479 APInt UndefLHS, ZeroLHS;
3480 APInt UndefRHS, ZeroRHS;
3481 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3482 Depth + 1))
3483 return true;
3484 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3485 Depth + 1))
3486 return true;
3487
3488 // Simplify mask using undef elements from LHS/RHS.
3489 bool Updated = false;
3490 bool IdentityLHS = true, IdentityRHS = true;
3491 SmallVector<int, 32> NewMask(ShuffleMask);
3492 for (unsigned i = 0; i != NumElts; ++i) {
3493 int &M = NewMask[i];
3494 if (M < 0)
3495 continue;
3496 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3497 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3498 Updated = true;
3499 M = -1;
3500 }
3501 IdentityLHS &= (M < 0) || (M == (int)i);
3502 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3503 }
3504
3505 // Update legal shuffle masks based on demanded elements if it won't reduce
3506 // to Identity which can cause premature removal of the shuffle mask.
3507 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3508 SDValue LegalShuffle =
3509 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3510 if (LegalShuffle)
3511 return TLO.CombineTo(Op, LegalShuffle);
3512 }
3513
3514 // Propagate undef/zero elements from LHS/RHS.
3515 for (unsigned i = 0; i != NumElts; ++i) {
3516 int M = ShuffleMask[i];
3517 if (M < 0) {
3518 KnownUndef.setBit(i);
3519 } else if (M < (int)NumElts) {
3520 if (UndefLHS[M])
3521 KnownUndef.setBit(i);
3522 if (ZeroLHS[M])
3523 KnownZero.setBit(i);
3524 } else {
3525 if (UndefRHS[M - NumElts])
3526 KnownUndef.setBit(i);
3527 if (ZeroRHS[M - NumElts])
3528 KnownZero.setBit(i);
3529 }
3530 }
3531 break;
3532 }
3536 APInt SrcUndef, SrcZero;
3537 SDValue Src = Op.getOperand(0);
3538 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3539 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3540 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3541 Depth + 1))
3542 return true;
3543 KnownZero = SrcZero.zextOrTrunc(NumElts);
3544 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3545
3546 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3547 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3548 DemandedSrcElts == 1) {
3549 // aext - if we just need the bottom element then we can bitcast.
3550 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3551 }
3552
3553 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3554 // zext(undef) upper bits are guaranteed to be zero.
3555 if (DemandedElts.isSubsetOf(KnownUndef))
3556 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3557 KnownUndef.clearAllBits();
3558
3559 // zext - if we just need the bottom element then we can mask:
3560 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3561 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3562 Op->isOnlyUserOf(Src.getNode()) &&
3563 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3564 SDLoc DL(Op);
3565 EVT SrcVT = Src.getValueType();
3566 EVT SrcSVT = SrcVT.getScalarType();
3567 SmallVector<SDValue> MaskElts;
3568 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3569 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3570 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3571 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3572 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3573 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3574 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3575 }
3576 }
3577 }
3578 break;
3579 }
3580
3581 // TODO: There are more binop opcodes that could be handled here - MIN,
3582 // MAX, saturated math, etc.
3583 case ISD::ADD: {
3584 SDValue Op0 = Op.getOperand(0);
3585 SDValue Op1 = Op.getOperand(1);
3586 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3587 APInt UndefLHS, ZeroLHS;
3588 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3589 Depth + 1, /*AssumeSingleUse*/ true))
3590 return true;
3591 }
3592 [[fallthrough]];
3593 }
3594 case ISD::AVGCEILS:
3595 case ISD::AVGCEILU:
3596 case ISD::AVGFLOORS:
3597 case ISD::AVGFLOORU:
3598 case ISD::OR:
3599 case ISD::XOR:
3600 case ISD::SUB:
3601 case ISD::FADD:
3602 case ISD::FSUB:
3603 case ISD::FMUL:
3604 case ISD::FDIV:
3605 case ISD::FREM: {
3606 SDValue Op0 = Op.getOperand(0);
3607 SDValue Op1 = Op.getOperand(1);
3608
3609 APInt UndefRHS, ZeroRHS;
3610 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3611 Depth + 1))
3612 return true;
3613 APInt UndefLHS, ZeroLHS;
3614 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3615 Depth + 1))
3616 return true;
3617
3618 KnownZero = ZeroLHS & ZeroRHS;
3619 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3620
3621 // Attempt to avoid multi-use ops if we don't need anything from them.
3622 // TODO - use KnownUndef to relax the demandedelts?
3623 if (!DemandedElts.isAllOnes())
3624 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3625 return true;
3626 break;
3627 }
3628 case ISD::SHL:
3629 case ISD::SRL:
3630 case ISD::SRA:
3631 case ISD::ROTL:
3632 case ISD::ROTR: {
3633 SDValue Op0 = Op.getOperand(0);
3634 SDValue Op1 = Op.getOperand(1);
3635
3636 APInt UndefRHS, ZeroRHS;
3637 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3638 Depth + 1))
3639 return true;
3640 APInt UndefLHS, ZeroLHS;
3641 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3642 Depth + 1))
3643 return true;
3644
3645 KnownZero = ZeroLHS;
3646 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3647
3648 // Attempt to avoid multi-use ops if we don't need anything from them.
3649 // TODO - use KnownUndef to relax the demandedelts?
3650 if (!DemandedElts.isAllOnes())
3651 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3652 return true;
3653 break;
3654 }
3655 case ISD::MUL:
3656 case ISD::MULHU:
3657 case ISD::MULHS:
3658 case ISD::AND: {
3659 SDValue Op0 = Op.getOperand(0);
3660 SDValue Op1 = Op.getOperand(1);
3661
3662 APInt SrcUndef, SrcZero;
3663 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3664 Depth + 1))
3665 return true;
3666 // If we know that a demanded element was zero in Op1 we don't need to
3667 // demand it in Op0 - its guaranteed to be zero.
3668 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3669 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3670 TLO, Depth + 1))
3671 return true;
3672
3673 KnownUndef &= DemandedElts0;
3674 KnownZero &= DemandedElts0;
3675
3676 // If every element pair has a zero/undef then just fold to zero.
3677 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3678 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3679 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3680 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3681
3682 // If either side has a zero element, then the result element is zero, even
3683 // if the other is an UNDEF.
3684 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3685 // and then handle 'and' nodes with the rest of the binop opcodes.
3686 KnownZero |= SrcZero;
3687 KnownUndef &= SrcUndef;
3688 KnownUndef &= ~KnownZero;
3689
3690 // Attempt to avoid multi-use ops if we don't need anything from them.
3691 if (!DemandedElts.isAllOnes())
3692 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3693 return true;
3694 break;
3695 }
3696 case ISD::TRUNCATE:
3697 case ISD::SIGN_EXTEND:
3698 case ISD::ZERO_EXTEND:
3699 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3700 KnownZero, TLO, Depth + 1))
3701 return true;
3702
3703 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3704 // zext(undef) upper bits are guaranteed to be zero.
3705 if (DemandedElts.isSubsetOf(KnownUndef))
3706 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3707 KnownUndef.clearAllBits();
3708 }
3709 break;
3710 default: {
3711 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3712 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3713 KnownZero, TLO, Depth))
3714 return true;
3715 } else {
3716 KnownBits Known;
3717 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3718 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3719 TLO, Depth, AssumeSingleUse))
3720 return true;
3721 }
3722 break;
3723 }
3724 }
3725 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3726
3727 // Constant fold all undef cases.
3728 // TODO: Handle zero cases as well.
3729 if (DemandedElts.isSubsetOf(KnownUndef))
3730 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3731
3732 return false;
3733}
3734
3735/// Determine which of the bits specified in Mask are known to be either zero or
3736/// one and return them in the Known.
3738 KnownBits &Known,
3739 const APInt &DemandedElts,
3740 const SelectionDAG &DAG,
3741 unsigned Depth) const {
3742 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3743 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3744 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3745 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3746 "Should use MaskedValueIsZero if you don't know whether Op"
3747 " is a target node!");
3748 Known.resetAll();
3749}
3750
3753 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3754 unsigned Depth) const {
3755 Known.resetAll();
3756}
3757
3759 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3760 // The low bits are known zero if the pointer is aligned.
3761 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3762}
3763
3766 unsigned Depth) const {
3767 return Align(1);
3768}
3769
3770/// This method can be implemented by targets that want to expose additional
3771/// information about sign bits to the DAG Combiner.
3773 const APInt &,
3774 const SelectionDAG &,
3775 unsigned Depth) const {
3776 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3777 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3778 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3779 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3780 "Should use ComputeNumSignBits if you don't know whether Op"
3781 " is a target node!");
3782 return 1;
3783}
3784
3786 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3787 const MachineRegisterInfo &MRI, unsigned Depth) const {
3788 return 1;
3789}
3790
3792 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3793 TargetLoweringOpt &TLO, unsigned Depth) const {
3794 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3795 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3796 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3797 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3798 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3799 " is a target node!");
3800 return false;
3801}
3802
3804 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3805 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3806 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3807 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3808 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3809 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3810 "Should use SimplifyDemandedBits if you don't know whether Op"
3811 " is a target node!");
3812 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3813 return false;
3814}
3815
3817 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3818 SelectionDAG &DAG, unsigned Depth) const {
3819 assert(
3820 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3821 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3822 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3823 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3824 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3825 " is a target node!");
3826 return SDValue();
3827}
3828
3829SDValue
3832 SelectionDAG &DAG) const {
3833 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3834 if (!LegalMask) {
3835 std::swap(N0, N1);
3837 LegalMask = isShuffleMaskLegal(Mask, VT);
3838 }
3839
3840 if (!LegalMask)
3841 return SDValue();
3842
3843 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3844}
3845
3847 return nullptr;
3848}
3849
3851 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3852 bool PoisonOnly, unsigned Depth) const {
3853 assert(
3854 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3855 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3856 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3857 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3858 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3859 " is a target node!");
3860
3861 // If Op can't create undef/poison and none of its operands are undef/poison
3862 // then Op is never undef/poison.
3863 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3864 /*ConsiderFlags*/ true, Depth) &&
3865 all_of(Op->ops(), [&](SDValue V) {
3866 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3867 Depth + 1);
3868 });
3869}
3870
3872 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3873 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3874 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3875 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3876 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3877 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3878 "Should use canCreateUndefOrPoison if you don't know whether Op"
3879 " is a target node!");
3880 // Be conservative and return true.
3881 return true;
3882}
3883
3885 const SelectionDAG &DAG,
3886 bool SNaN,
3887 unsigned Depth) const {
3888 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3889 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3890 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3891 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3892 "Should use isKnownNeverNaN if you don't know whether Op"
3893 " is a target node!");
3894 return false;
3895}
3896
3898 const APInt &DemandedElts,
3899 APInt &UndefElts,
3900 const SelectionDAG &DAG,
3901 unsigned Depth) const {
3902 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3903 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3904 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3905 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3906 "Should use isSplatValue if you don't know whether Op"
3907 " is a target node!");
3908 return false;
3909}
3910
3911// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3912// work with truncating build vectors and vectors with elements of less than
3913// 8 bits.
3915 if (!N)
3916 return false;
3917
3918 unsigned EltWidth;
3919 APInt CVal;
3920 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3921 /*AllowTruncation=*/true)) {
3922 CVal = CN->getAPIntValue();
3923 EltWidth = N.getValueType().getScalarSizeInBits();
3924 } else
3925 return false;
3926
3927 // If this is a truncating splat, truncate the splat value.
3928 // Otherwise, we may fail to match the expected values below.
3929 if (EltWidth < CVal.getBitWidth())
3930 CVal = CVal.trunc(EltWidth);
3931
3932 switch (getBooleanContents(N.getValueType())) {
3934 return CVal[0];
3936 return CVal.isOne();
3938 return CVal.isAllOnes();
3939 }
3940
3941 llvm_unreachable("Invalid boolean contents");
3942}
3943
3945 if (!N)
3946 return false;
3947
3948 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3949 if (!CN) {
3950 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3951 if (!BV)
3952 return false;
3953
3954 // Only interested in constant splats, we don't care about undef
3955 // elements in identifying boolean constants and getConstantSplatNode
3956 // returns NULL if all ops are undef;
3957 CN = BV->getConstantSplatNode();
3958 if (!CN)
3959 return false;
3960 }
3961
3962 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3963 return !CN->getAPIntValue()[0];
3964
3965 return CN->isZero();
3966}
3967
3969 bool SExt) const {
3970 if (VT == MVT::i1)
3971 return N->isOne();
3972
3974 switch (Cnt) {
3976 // An extended value of 1 is always true, unless its original type is i1,
3977 // in which case it will be sign extended to -1.
3978 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3981 return N->isAllOnes() && SExt;
3982 }
3983 llvm_unreachable("Unexpected enumeration.");
3984}
3985
3986/// This helper function of SimplifySetCC tries to optimize the comparison when
3987/// either operand of the SetCC node is a bitwise-and instruction.
3988SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3989 ISD::CondCode Cond, const SDLoc &DL,
3990 DAGCombinerInfo &DCI) const {
3991 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3992 std::swap(N0, N1);
3993
3994 SelectionDAG &DAG = DCI.DAG;
3995 EVT OpVT = N0.getValueType();
3996 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3997 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3998 return SDValue();
3999
4000 // (X & Y) != 0 --> zextOrTrunc(X & Y)
4001 // iff everything but LSB is known zero:
4002 if (Cond == ISD::SETNE && isNullConstant(N1) &&
4005 unsigned NumEltBits = OpVT.getScalarSizeInBits();
4006 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
4007 if (DAG.MaskedValueIsZero(N0, UpperBits))
4008 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
4009 }
4010
4011 // Try to eliminate a power-of-2 mask constant by converting to a signbit
4012 // test in a narrow type that we can truncate to with no cost. Examples:
4013 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
4014 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
4015 // TODO: This conservatively checks for type legality on the source and
4016 // destination types. That may inhibit optimizations, but it also
4017 // allows setcc->shift transforms that may be more beneficial.
4018 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4019 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
4020 isTypeLegal(OpVT) && N0.hasOneUse()) {
4021 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
4022 AndC->getAPIntValue().getActiveBits());
4023 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
4024 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
4025 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
4026 return DAG.getSetCC(DL, VT, Trunc, Zero,
4028 }
4029 }
4030
4031 // Match these patterns in any of their permutations:
4032 // (X & Y) == Y
4033 // (X & Y) != Y
4034 SDValue X, Y;
4035 if (N0.getOperand(0) == N1) {
4036 X = N0.getOperand(1);
4037 Y = N0.getOperand(0);
4038 } else if (N0.getOperand(1) == N1) {
4039 X = N0.getOperand(0);
4040 Y = N0.getOperand(1);
4041 } else {
4042 return SDValue();
4043 }
4044
4045 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4046 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4047 // its liable to create and infinite loop.
4048 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4049 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4051 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4052 // Note that where Y is variable and is known to have at most one bit set
4053 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4054 // equivalent when Y == 0.
4055 assert(OpVT.isInteger());
4057 if (DCI.isBeforeLegalizeOps() ||
4059 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4060 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4061 // If the target supports an 'and-not' or 'and-complement' logic operation,
4062 // try to use that to make a comparison operation more efficient.
4063 // But don't do this transform if the mask is a single bit because there are
4064 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4065 // 'rlwinm' on PPC).
4066
4067 // Bail out if the compare operand that we want to turn into a zero is
4068 // already a zero (otherwise, infinite loop).
4069 if (isNullConstant(Y))
4070 return SDValue();
4071
4072 // Transform this into: ~X & Y == 0.
4073 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4074 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4075 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4076 }
4077
4078 return SDValue();
4079}
4080
4081/// There are multiple IR patterns that could be checking whether certain
4082/// truncation of a signed number would be lossy or not. The pattern which is
4083/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4084/// We are looking for the following pattern: (KeptBits is a constant)
4085/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4086/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4087/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4088/// We will unfold it into the natural trunc+sext pattern:
4089/// ((%x << C) a>> C) dstcond %x
4090/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4091SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4092 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4093 const SDLoc &DL) const {
4094 // We must be comparing with a constant.
4095 ConstantSDNode *C1;
4096 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4097 return SDValue();
4098
4099 // N0 should be: add %x, (1 << (KeptBits-1))
4100 if (N0->getOpcode() != ISD::ADD)
4101 return SDValue();
4102
4103 // And we must be 'add'ing a constant.
4104 ConstantSDNode *C01;
4105 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4106 return SDValue();
4107
4108 SDValue X = N0->getOperand(0);
4109 EVT XVT = X.getValueType();
4110
4111 // Validate constants ...
4112
4113 APInt I1 = C1->getAPIntValue();
4114
4115 ISD::CondCode NewCond;
4116 if (Cond == ISD::CondCode::SETULT) {
4117 NewCond = ISD::CondCode::SETEQ;
4118 } else if (Cond == ISD::CondCode::SETULE) {
4119 NewCond = ISD::CondCode::SETEQ;
4120 // But need to 'canonicalize' the constant.
4121 I1 += 1;
4122 } else if (Cond == ISD::CondCode::SETUGT) {
4123 NewCond = ISD::CondCode::SETNE;
4124 // But need to 'canonicalize' the constant.
4125 I1 += 1;
4126 } else if (Cond == ISD::CondCode::SETUGE) {
4127 NewCond = ISD::CondCode::SETNE;
4128 } else
4129 return SDValue();
4130
4131 APInt I01 = C01->getAPIntValue();
4132
4133 auto checkConstants = [&I1, &I01]() -> bool {
4134 // Both of them must be power-of-two, and the constant from setcc is bigger.
4135 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4136 };
4137
4138 if (checkConstants()) {
4139 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4140 } else {
4141 // What if we invert constants? (and the target predicate)
4142 I1.negate();
4143 I01.negate();
4144 assert(XVT.isInteger());
4145 NewCond = getSetCCInverse(NewCond, XVT);
4146 if (!checkConstants())
4147 return SDValue();
4148 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4149 }
4150
4151 // They are power-of-two, so which bit is set?
4152 const unsigned KeptBits = I1.logBase2();
4153 const unsigned KeptBitsMinusOne = I01.logBase2();
4154
4155 // Magic!
4156 if (KeptBits != (KeptBitsMinusOne + 1))
4157 return SDValue();
4158 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4159
4160 // We don't want to do this in every single case.
4161 SelectionDAG &DAG = DCI.DAG;
4162 if (!shouldTransformSignedTruncationCheck(XVT, KeptBits))
4163 return SDValue();
4164
4165 // Unfold into: sext_inreg(%x) cond %x
4166 // Where 'cond' will be either 'eq' or 'ne'.
4167 SDValue SExtInReg = DAG.getNode(
4169 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4170 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4171}
4172
4173// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4174SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4175 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4176 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4178 "Should be a comparison with 0.");
4179 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4180 "Valid only for [in]equality comparisons.");
4181
4182 unsigned NewShiftOpcode;
4183 SDValue X, C, Y;
4184
4185 SelectionDAG &DAG = DCI.DAG;
4186
4187 // Look for '(C l>>/<< Y)'.
4188 auto Match = [&NewShiftOpcode, &X, &C, &Y, &DAG, this](SDValue V) {
4189 // The shift should be one-use.
4190 if (!V.hasOneUse())
4191 return false;
4192 unsigned OldShiftOpcode = V.getOpcode();
4193 switch (OldShiftOpcode) {
4194 case ISD::SHL:
4195 NewShiftOpcode = ISD::SRL;
4196 break;
4197 case ISD::SRL:
4198 NewShiftOpcode = ISD::SHL;
4199 break;
4200 default:
4201 return false; // must be a logical shift.
4202 }
4203 // We should be shifting a constant.
4204 // FIXME: best to use isConstantOrConstantVector().
4205 C = V.getOperand(0);
4207 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4208 if (!CC)
4209 return false;
4210 Y = V.getOperand(1);
4211
4213 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4215 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4216 };
4217
4218 // LHS of comparison should be an one-use 'and'.
4219 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4220 return SDValue();
4221
4222 X = N0.getOperand(0);
4223 SDValue Mask = N0.getOperand(1);
4224
4225 // 'and' is commutative!
4226 if (!Match(Mask)) {
4227 std::swap(X, Mask);
4228 if (!Match(Mask))
4229 return SDValue();
4230 }
4231
4232 EVT VT = X.getValueType();
4233
4234 // Produce:
4235 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4236 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4237 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4238 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4239 return T2;
4240}
4241
4242/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4243/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4244/// handle the commuted versions of these patterns.
4245SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4246 ISD::CondCode Cond, const SDLoc &DL,
4247 DAGCombinerInfo &DCI) const {
4248 unsigned BOpcode = N0.getOpcode();
4249 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4250 "Unexpected binop");
4251 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4252
4253 // (X + Y) == X --> Y == 0
4254 // (X - Y) == X --> Y == 0
4255 // (X ^ Y) == X --> Y == 0
4256 SelectionDAG &DAG = DCI.DAG;
4257 EVT OpVT = N0.getValueType();
4258 SDValue X = N0.getOperand(0);
4259 SDValue Y = N0.getOperand(1);
4260 if (X == N1)
4261 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4262
4263 if (Y != N1)
4264 return SDValue();
4265
4266 // (X + Y) == Y --> X == 0
4267 // (X ^ Y) == Y --> X == 0
4268 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4269 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4270
4271 // The shift would not be valid if the operands are boolean (i1).
4272 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4273 return SDValue();
4274
4275 // (X - Y) == Y --> X == Y << 1
4276 SDValue One = DAG.getShiftAmountConstant(1, OpVT, DL);
4277 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4278 if (!DCI.isCalledByLegalizer())
4279 DCI.AddToWorklist(YShl1.getNode());
4280 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4281}
4282
4284 SDValue N0, const APInt &C1,
4285 ISD::CondCode Cond, const SDLoc &dl,
4286 SelectionDAG &DAG) {
4287 // Look through truncs that don't change the value of a ctpop.
4288 // FIXME: Add vector support? Need to be careful with setcc result type below.
4289 SDValue CTPOP = N0;
4290 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4292 CTPOP = N0.getOperand(0);
4293
4294 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4295 return SDValue();
4296
4297 EVT CTVT = CTPOP.getValueType();
4298 SDValue CTOp = CTPOP.getOperand(0);
4299
4300 // Expand a power-of-2-or-zero comparison based on ctpop:
4301 // (ctpop x) u< 2 -> (x & x-1) == 0
4302 // (ctpop x) u> 1 -> (x & x-1) != 0
4303 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4304 // Keep the CTPOP if it is a cheap vector op.
4305 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4306 return SDValue();
4307
4308 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4309 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4310 return SDValue();
4311 if (C1 == 0 && (Cond == ISD::SETULT))
4312 return SDValue(); // This is handled elsewhere.
4313
4314 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4315
4316 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4317 SDValue Result = CTOp;
4318 for (unsigned i = 0; i < Passes; i++) {
4319 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4320 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4321 }
4323 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4324 }
4325
4326 // Expand a power-of-2 comparison based on ctpop
4327 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4328 // Keep the CTPOP if it is cheap.
4329 if (TLI.isCtpopFast(CTVT))
4330 return SDValue();
4331
4332 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4333 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4334 assert(CTVT.isInteger());
4335 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4336
4337 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4338 // check before emitting a potentially unnecessary op.
4339 if (DAG.isKnownNeverZero(CTOp)) {
4340 // (ctpop x) == 1 --> (x & x-1) == 0
4341 // (ctpop x) != 1 --> (x & x-1) != 0
4342 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4343 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4344 return RHS;
4345 }
4346
4347 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4348 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4349 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4351 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4352 }
4353
4354 return SDValue();
4355}
4356
4358 ISD::CondCode Cond, const SDLoc &dl,
4359 SelectionDAG &DAG) {
4360 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4361 return SDValue();
4362
4363 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4364 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4365 return SDValue();
4366
4367 auto getRotateSource = [](SDValue X) {
4368 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4369 return X.getOperand(0);
4370 return SDValue();
4371 };
4372
4373 // Peek through a rotated value compared against 0 or -1:
4374 // (rot X, Y) == 0/-1 --> X == 0/-1
4375 // (rot X, Y) != 0/-1 --> X != 0/-1
4376 if (SDValue R = getRotateSource(N0))
4377 return DAG.getSetCC(dl, VT, R, N1, Cond);
4378
4379 // Peek through an 'or' of a rotated value compared against 0:
4380 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4381 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4382 //
4383 // TODO: Add the 'and' with -1 sibling.
4384 // TODO: Recurse through a series of 'or' ops to find the rotate.
4385 EVT OpVT = N0.getValueType();
4386 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4387 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4388 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4389 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4390 }
4391 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4392 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4393 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4394 }
4395 }
4396
4397 return SDValue();
4398}
4399
4401 ISD::CondCode Cond, const SDLoc &dl,
4402 SelectionDAG &DAG) {
4403 // If we are testing for all-bits-clear, we might be able to do that with
4404 // less shifting since bit-order does not matter.
4405 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4406 return SDValue();
4407
4408 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4409 if (!C1 || !C1->isZero())
4410 return SDValue();
4411
4412 if (!N0.hasOneUse() ||
4413 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4414 return SDValue();
4415
4416 unsigned BitWidth = N0.getScalarValueSizeInBits();
4417 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4418 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4419 return SDValue();
4420
4421 // Canonicalize fshr as fshl to reduce pattern-matching.
4422 unsigned ShAmt = ShAmtC->getZExtValue();
4423 if (N0.getOpcode() == ISD::FSHR)
4424 ShAmt = BitWidth - ShAmt;
4425
4426 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4427 SDValue X, Y;
4428 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4429 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4430 return false;
4431 if (Or.getOperand(0) == Other) {
4432 X = Or.getOperand(0);
4433 Y = Or.getOperand(1);
4434 return true;
4435 }
4436 if (Or.getOperand(1) == Other) {
4437 X = Or.getOperand(1);
4438 Y = Or.getOperand(0);
4439 return true;
4440 }
4441 return false;
4442 };
4443
4444 EVT OpVT = N0.getValueType();
4445 EVT ShAmtVT = N0.getOperand(2).getValueType();
4446 SDValue F0 = N0.getOperand(0);
4447 SDValue F1 = N0.getOperand(1);
4448 if (matchOr(F0, F1)) {
4449 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4450 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4451 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4452 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4453 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4454 }
4455 if (matchOr(F1, F0)) {
4456 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4457 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4458 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4459 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4460 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4461 }
4462
4463 return SDValue();
4464}
4465
4466/// Try to simplify a setcc built with the specified operands and cc. If it is
4467/// unable to simplify it, return a null SDValue.
4469 ISD::CondCode Cond, bool foldBooleans,
4470 DAGCombinerInfo &DCI,
4471 const SDLoc &dl) const {
4472 SelectionDAG &DAG = DCI.DAG;
4473 const DataLayout &Layout = DAG.getDataLayout();
4474 EVT OpVT = N0.getValueType();
4476
4477 // Constant fold or commute setcc.
4478 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4479 return Fold;
4480
4481 bool N0ConstOrSplat =
4482 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4483 bool N1ConstOrSplat =
4484 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4485
4486 // Canonicalize toward having the constant on the RHS.
4487 // TODO: Handle non-splat vector constants. All undef causes trouble.
4488 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4489 // infinite loop here when we encounter one.
4491 if (N0ConstOrSplat && !N1ConstOrSplat &&
4492 (DCI.isBeforeLegalizeOps() ||
4493 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4494 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4495
4496 // If we have a subtract with the same 2 non-constant operands as this setcc
4497 // -- but in reverse order -- then try to commute the operands of this setcc
4498 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4499 // instruction on some targets.
4500 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4501 (DCI.isBeforeLegalizeOps() ||
4502 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4503 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4504 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4505 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4506
4507 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4508 return V;
4509
4510 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4511 return V;
4512
4513 if (auto *N1C = isConstOrConstSplat(N1)) {
4514 const APInt &C1 = N1C->getAPIntValue();
4515
4516 // Optimize some CTPOP cases.
4517 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4518 return V;
4519
4520 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4521 // X * Y == 0 --> (X == 0) || (Y == 0)
4522 // X * Y != 0 --> (X != 0) && (Y != 0)
4523 // TODO: This bails out if minsize is set, but if the target doesn't have a
4524 // single instruction multiply for this type, it would likely be
4525 // smaller to decompose.
4526 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4527 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4528 (N0->getFlags().hasNoUnsignedWrap() ||
4529 N0->getFlags().hasNoSignedWrap()) &&
4530 !Attr.hasFnAttr(Attribute::MinSize)) {
4531 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4532 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4533 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4534 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4535 }
4536
4537 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4538 // equality comparison, then we're just comparing whether X itself is
4539 // zero.
4540 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4541 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4542 llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4543 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4544 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4545 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4546 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4547 // (srl (ctlz x), 5) == 0 -> X != 0
4548 // (srl (ctlz x), 5) != 1 -> X != 0
4549 Cond = ISD::SETNE;
4550 } else {
4551 // (srl (ctlz x), 5) != 0 -> X == 0
4552 // (srl (ctlz x), 5) == 1 -> X == 0
4553 Cond = ISD::SETEQ;
4554 }
4555 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4556 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4557 Cond);
4558 }
4559 }
4560 }
4561 }
4562
4563 // FIXME: Support vectors.
4564 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4565 const APInt &C1 = N1C->getAPIntValue();
4566
4567 // (zext x) == C --> x == (trunc C)
4568 // (sext x) == C --> x == (trunc C)
4569 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4570 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4571 unsigned MinBits = N0.getValueSizeInBits();
4572 SDValue PreExt;
4573 bool Signed = false;
4574 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4575 // ZExt
4576 MinBits = N0->getOperand(0).getValueSizeInBits();
4577 PreExt = N0->getOperand(0);
4578 } else if (N0->getOpcode() == ISD::AND) {
4579 // DAGCombine turns costly ZExts into ANDs
4580 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4581 if ((C->getAPIntValue()+1).isPowerOf2()) {
4582 MinBits = C->getAPIntValue().countr_one();
4583 PreExt = N0->getOperand(0);
4584 }
4585 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4586 // SExt
4587 MinBits = N0->getOperand(0).getValueSizeInBits();
4588 PreExt = N0->getOperand(0);
4589 Signed = true;
4590 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4591 // ZEXTLOAD / SEXTLOAD
4592 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4593 MinBits = LN0->getMemoryVT().getSizeInBits();
4594 PreExt = N0;
4595 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4596 Signed = true;
4597 MinBits = LN0->getMemoryVT().getSizeInBits();
4598 PreExt = N0;
4599 }
4600 }
4601
4602 // Figure out how many bits we need to preserve this constant.
4603 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4604
4605 // Make sure we're not losing bits from the constant.
4606 if (MinBits > 0 &&
4607 MinBits < C1.getBitWidth() &&
4608 MinBits >= ReqdBits) {
4609 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4610 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4611 // Will get folded away.
4612 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4613 if (MinBits == 1 && C1 == 1)
4614 // Invert the condition.
4615 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4617 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4618 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4619 }
4620
4621 // If truncating the setcc operands is not desirable, we can still
4622 // simplify the expression in some cases:
4623 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4624 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4625 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4626 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4627 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4628 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4629 SDValue TopSetCC = N0->getOperand(0);
4630 unsigned N0Opc = N0->getOpcode();
4631 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4632 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4633 TopSetCC.getOpcode() == ISD::SETCC &&
4634 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4635 (isConstFalseVal(N1) ||
4636 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4637
4638 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4639 (!N1C->isZero() && Cond == ISD::SETNE);
4640
4641 if (!Inverse)
4642 return TopSetCC;
4643
4645 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4646 TopSetCC.getOperand(0).getValueType());
4647 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4648 TopSetCC.getOperand(1),
4649 InvCond);
4650 }
4651 }
4652 }
4653
4654 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4655 // equality or unsigned, and all 1 bits of the const are in the same
4656 // partial word, see if we can shorten the load.
4657 if (DCI.isBeforeLegalize() &&
4659 N0.getOpcode() == ISD::AND && C1 == 0 &&
4660 N0.getNode()->hasOneUse() &&
4661 isa<LoadSDNode>(N0.getOperand(0)) &&
4662 N0.getOperand(0).getNode()->hasOneUse() &&
4663 isa<ConstantSDNode>(N0.getOperand(1))) {
4664 auto *Lod = cast<LoadSDNode>(N0.getOperand(0));
4665 APInt bestMask;
4666 unsigned bestWidth = 0, bestOffset = 0;
4667 if (Lod->isSimple() && Lod->isUnindexed() &&
4668 (Lod->getMemoryVT().isByteSized() ||
4669 isPaddedAtMostSignificantBitsWhenStored(Lod->getMemoryVT()))) {
4670 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4671 unsigned origWidth = N0.getValueSizeInBits();
4672 unsigned maskWidth = origWidth;
4673 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4674 // 8 bits, but have to be careful...
4675 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4676 origWidth = Lod->getMemoryVT().getSizeInBits();
4677 const APInt &Mask = N0.getConstantOperandAPInt(1);
4678 // Only consider power-of-2 widths (and at least one byte) as candiates
4679 // for the narrowed load.
4680 for (unsigned width = 8; width < origWidth; width *= 2) {
4681 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4682 if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4683 continue;
4684 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4685 // Avoid accessing any padding here for now (we could use memWidth
4686 // instead of origWidth here otherwise).
4687 unsigned maxOffset = origWidth - width;
4688 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4689 if (Mask.isSubsetOf(newMask)) {
4690 unsigned ptrOffset =
4691 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4692 unsigned IsFast = 0;
4693 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4695 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4696 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4697 IsFast) {
4698 bestOffset = ptrOffset / 8;
4699 bestMask = Mask.lshr(offset);
4700 bestWidth = width;
4701 break;
4702 }
4703 }
4704 newMask <<= 8;
4705 }
4706 if (bestWidth)
4707 break;
4708 }
4709 }
4710 if (bestWidth) {
4711 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4712 SDValue Ptr = Lod->getBasePtr();
4713 if (bestOffset != 0)
4714 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4715 SDValue NewLoad =
4716 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4717 Lod->getPointerInfo().getWithOffset(bestOffset),
4718 Lod->getOriginalAlign());
4719 SDValue And =
4720 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4721 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4722 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4723 }
4724 }
4725
4726 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4727 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4728 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4729
4730 // If the comparison constant has bits in the upper part, the
4731 // zero-extended value could never match.
4733 C1.getBitWidth() - InSize))) {
4734 switch (Cond) {
4735 case ISD::SETUGT:
4736 case ISD::SETUGE:
4737 case ISD::SETEQ:
4738 return DAG.getConstant(0, dl, VT);
4739 case ISD::SETULT:
4740 case ISD::SETULE:
4741 case ISD::SETNE:
4742 return DAG.getConstant(1, dl, VT);
4743 case ISD::SETGT:
4744 case ISD::SETGE:
4745 // True if the sign bit of C1 is set.
4746 return DAG.getConstant(C1.isNegative(), dl, VT);
4747 case ISD::SETLT:
4748 case ISD::SETLE:
4749 // True if the sign bit of C1 isn't set.
4750 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4751 default:
4752 break;
4753 }
4754 }
4755
4756 // Otherwise, we can perform the comparison with the low bits.
4757 switch (Cond) {
4758 case ISD::SETEQ:
4759 case ISD::SETNE:
4760 case ISD::SETUGT:
4761 case ISD::SETUGE:
4762 case ISD::SETULT:
4763 case ISD::SETULE: {
4764 EVT newVT = N0.getOperand(0).getValueType();
4765 if (DCI.isBeforeLegalizeOps() ||
4766 (isOperationLegal(ISD::SETCC, newVT) &&
4767 isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
4768 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4769 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4770
4771 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4772 NewConst, Cond);
4773 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4774 }
4775 break;
4776 }
4777 default:
4778 break; // todo, be more careful with signed comparisons
4779 }
4780 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4781 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4782 !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4783 OpVT)) {
4784 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4785 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4786 EVT ExtDstTy = N0.getValueType();
4787 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4788
4789 // If the constant doesn't fit into the number of bits for the source of
4790 // the sign extension, it is impossible for both sides to be equal.
4791 if (C1.getSignificantBits() > ExtSrcTyBits)
4792 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4793
4794 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4795 ExtDstTy != ExtSrcTy && "Unexpected types!");
4796 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4797 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4798 DAG.getConstant(Imm, dl, ExtDstTy));
4799 if (!DCI.isCalledByLegalizer())
4800 DCI.AddToWorklist(ZextOp.getNode());
4801 // Otherwise, make this a use of a zext.
4802 return DAG.getSetCC(dl, VT, ZextOp,
4803 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4804 } else if ((N1C->isZero() || N1C->isOne()) &&
4805 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4806 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4807 // excluded as they are handled below whilst checking for foldBooleans.
4808 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4809 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4810 (N0.getValueType() == MVT::i1 ||
4814 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4815 if (TrueWhenTrue)
4816 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4817 // Invert the condition.
4818 if (N0.getOpcode() == ISD::SETCC) {
4819 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4821 if (DCI.isBeforeLegalizeOps() ||
4823 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4824 }
4825 }
4826
4827 if ((N0.getOpcode() == ISD::XOR ||
4828 (N0.getOpcode() == ISD::AND &&
4829 N0.getOperand(0).getOpcode() == ISD::XOR &&
4830 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4831 isOneConstant(N0.getOperand(1))) {
4832 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4833 // can only do this if the top bits are known zero.
4834 unsigned BitWidth = N0.getValueSizeInBits();
4835 if (DAG.MaskedValueIsZero(N0,
4837 BitWidth-1))) {
4838 // Okay, get the un-inverted input value.
4839 SDValue Val;
4840 if (N0.getOpcode() == ISD::XOR) {
4841 Val = N0.getOperand(0);
4842 } else {
4843 assert(N0.getOpcode() == ISD::AND &&
4844 N0.getOperand(0).getOpcode() == ISD::XOR);
4845 // ((X^1)&1)^1 -> X & 1
4846 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4847 N0.getOperand(0).getOperand(0),
4848 N0.getOperand(1));
4849 }
4850
4851 return DAG.getSetCC(dl, VT, Val, N1,
4853 }
4854 } else if (N1C->isOne()) {
4855 SDValue Op0 = N0;
4856 if (Op0.getOpcode() == ISD::TRUNCATE)
4857 Op0 = Op0.getOperand(0);
4858
4859 if ((Op0.getOpcode() == ISD::XOR) &&
4860 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4861 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4862 SDValue XorLHS = Op0.getOperand(0);
4863 SDValue XorRHS = Op0.getOperand(1);
4864 // Ensure that the input setccs return an i1 type or 0/1 value.
4865 if (Op0.getValueType() == MVT::i1 ||
4870 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4872 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4873 }
4874 }
4875 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4876 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4877 if (Op0.getValueType().bitsGT(VT))
4878 Op0 = DAG.getNode(ISD::AND, dl, VT,
4879 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4880 DAG.getConstant(1, dl, VT));
4881 else if (Op0.getValueType().bitsLT(VT))
4882 Op0 = DAG.getNode(ISD::AND, dl, VT,
4883 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4884 DAG.getConstant(1, dl, VT));
4885
4886 return DAG.getSetCC(dl, VT, Op0,
4887 DAG.getConstant(0, dl, Op0.getValueType()),
4889 }
4890 if (Op0.getOpcode() == ISD::AssertZext &&
4891 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4892 return DAG.getSetCC(dl, VT, Op0,
4893 DAG.getConstant(0, dl, Op0.getValueType()),
4895 }
4896 }
4897
4898 // Given:
4899 // icmp eq/ne (urem %x, %y), 0
4900 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4901 // icmp eq/ne %x, 0
4902 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4903 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4904 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4905 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4906 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4907 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4908 }
4909
4910 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4911 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4912 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4913 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4914 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4915 N1C->isAllOnes()) {
4916 return DAG.getSetCC(dl, VT, N0.getOperand(0),
4917 DAG.getConstant(0, dl, OpVT),
4919 }
4920
4921 if (SDValue V =
4922 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4923 return V;
4924 }
4925
4926 // These simplifications apply to splat vectors as well.
4927 // TODO: Handle more splat vector cases.
4928 if (auto *N1C = isConstOrConstSplat(N1)) {
4929 const APInt &C1 = N1C->getAPIntValue();
4930
4931 APInt MinVal, MaxVal;
4932 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4934 MinVal = APInt::getSignedMinValue(OperandBitSize);
4935 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4936 } else {
4937 MinVal = APInt::getMinValue(OperandBitSize);
4938 MaxVal = APInt::getMaxValue(OperandBitSize);
4939 }
4940
4941 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4942 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4943 // X >= MIN --> true
4944 if (C1 == MinVal)
4945 return DAG.getBoolConstant(true, dl, VT, OpVT);
4946
4947 if (!VT.isVector()) { // TODO: Support this for vectors.
4948 // X >= C0 --> X > (C0 - 1)
4949 APInt C = C1 - 1;
4951 if ((DCI.isBeforeLegalizeOps() ||
4952 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4953 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4954 isLegalICmpImmediate(C.getSExtValue())))) {
4955 return DAG.getSetCC(dl, VT, N0,
4956 DAG.getConstant(C, dl, N1.getValueType()),
4957 NewCC);
4958 }
4959 }
4960 }
4961
4962 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4963 // X <= MAX --> true
4964 if (C1 == MaxVal)
4965 return DAG.getBoolConstant(true, dl, VT, OpVT);
4966
4967 // X <= C0 --> X < (C0 + 1)
4968 if (!VT.isVector()) { // TODO: Support this for vectors.
4969 APInt C = C1 + 1;
4971 if ((DCI.isBeforeLegalizeOps() ||
4972 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4973 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4974 isLegalICmpImmediate(C.getSExtValue())))) {
4975 return DAG.getSetCC(dl, VT, N0,
4976 DAG.getConstant(C, dl, N1.getValueType()),
4977 NewCC);
4978 }
4979 }
4980 }
4981
4982 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4983 if (C1 == MinVal)
4984 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
4985
4986 // TODO: Support this for vectors after legalize ops.
4987 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4988 // Canonicalize setlt X, Max --> setne X, Max
4989 if (C1 == MaxVal)
4990 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4991
4992 // If we have setult X, 1, turn it into seteq X, 0
4993 if (C1 == MinVal+1)
4994 return DAG.getSetCC(dl, VT, N0,
4995 DAG.getConstant(MinVal, dl, N0.getValueType()),
4996 ISD::SETEQ);
4997 }
4998 }
4999
5000 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
5001 if (C1 == MaxVal)
5002 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
5003
5004 // TODO: Support this for vectors after legalize ops.
5005 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5006 // Canonicalize setgt X, Min --> setne X, Min
5007 if (C1 == MinVal)
5008 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
5009
5010 // If we have setugt X, Max-1, turn it into seteq X, Max
5011 if (C1 == MaxVal-1)
5012 return DAG.getSetCC(dl, VT, N0,
5013 DAG.getConstant(MaxVal, dl, N0.getValueType()),
5014 ISD::SETEQ);
5015 }
5016 }
5017
5018 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
5019 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
5020 if (C1.isZero())
5021 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
5022 VT, N0, N1, Cond, DCI, dl))
5023 return CC;
5024
5025 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
5026 // For example, when high 32-bits of i64 X are known clear:
5027 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
5028 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
5029 bool CmpZero = N1C->isZero();
5030 bool CmpNegOne = N1C->isAllOnes();
5031 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
5032 // Match or(lo,shl(hi,bw/2)) pattern.
5033 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
5034 unsigned EltBits = V.getScalarValueSizeInBits();
5035 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
5036 return false;
5037 SDValue LHS = V.getOperand(0);
5038 SDValue RHS = V.getOperand(1);
5039 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
5040 // Unshifted element must have zero upperbits.
5041 if (RHS.getOpcode() == ISD::SHL &&
5042 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5043 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5044 DAG.MaskedValueIsZero(LHS, HiBits)) {
5045 Lo = LHS;
5046 Hi = RHS.getOperand(0);
5047 return true;
5048 }
5049 if (LHS.getOpcode() == ISD::SHL &&
5050 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5051 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5052 DAG.MaskedValueIsZero(RHS, HiBits)) {
5053 Lo = RHS;
5054 Hi = LHS.getOperand(0);
5055 return true;
5056 }
5057 return false;
5058 };
5059
5060 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5061 unsigned EltBits = N0.getScalarValueSizeInBits();
5062 unsigned HalfBits = EltBits / 2;
5063 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5064 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5065 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5066 SDValue NewN0 =
5067 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5068 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5069 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5070 };
5071
5072 SDValue Lo, Hi;
5073 if (IsConcat(N0, Lo, Hi))
5074 return MergeConcat(Lo, Hi);
5075
5076 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5077 SDValue Lo0, Lo1, Hi0, Hi1;
5078 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5079 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5080 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5081 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5082 }
5083 }
5084 }
5085 }
5086
5087 // If we have "setcc X, C0", check to see if we can shrink the immediate
5088 // by changing cc.
5089 // TODO: Support this for vectors after legalize ops.
5090 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5091 // SETUGT X, SINTMAX -> SETLT X, 0
5092 // SETUGE X, SINTMIN -> SETLT X, 0
5093 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5094 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5095 return DAG.getSetCC(dl, VT, N0,
5096 DAG.getConstant(0, dl, N1.getValueType()),
5097 ISD::SETLT);
5098
5099 // SETULT X, SINTMIN -> SETGT X, -1
5100 // SETULE X, SINTMAX -> SETGT X, -1
5101 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5102 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5103 return DAG.getSetCC(dl, VT, N0,
5104 DAG.getAllOnesConstant(dl, N1.getValueType()),
5105 ISD::SETGT);
5106 }
5107 }
5108
5109 // Back to non-vector simplifications.
5110 // TODO: Can we do these for vector splats?
5111 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5112 const APInt &C1 = N1C->getAPIntValue();
5113 EVT ShValTy = N0.getValueType();
5114
5115 // Fold bit comparisons when we can. This will result in an
5116 // incorrect value when boolean false is negative one, unless
5117 // the bitsize is 1 in which case the false value is the same
5118 // in practice regardless of the representation.
5119 if ((VT.getSizeInBits() == 1 ||
5121 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5122 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5123 N0.getOpcode() == ISD::AND) {
5124 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5125 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5126 // Perform the xform if the AND RHS is a single bit.
5127 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5128 if (AndRHS->getAPIntValue().isPowerOf2() &&
5129 !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5130 return DAG.getNode(
5131 ISD::TRUNCATE, dl, VT,
5132 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5133 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5134 }
5135 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5136 // (X & 8) == 8 --> (X & 8) >> 3
5137 // Perform the xform if C1 is a single bit.
5138 unsigned ShCt = C1.logBase2();
5139 if (C1.isPowerOf2() && !shouldAvoidTransformToShift(ShValTy, ShCt)) {
5140 return DAG.getNode(
5141 ISD::TRUNCATE, dl, VT,
5142 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5143 DAG.getShiftAmountConstant(ShCt, ShValTy, dl)));
5144 }
5145 }
5146 }
5147 }
5148
5149 if (C1.getSignificantBits() <= 64 &&
5151 // (X & -256) == 256 -> (X >> 8) == 1
5152 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5153 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5154 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5155 const APInt &AndRHSC = AndRHS->getAPIntValue();
5156 if (AndRHSC.isNegatedPowerOf2() && C1.isSubsetOf(AndRHSC)) {
5157 unsigned ShiftBits = AndRHSC.countr_zero();
5158 if (!shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5159 SDValue Shift = DAG.getNode(
5160 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5161 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5162 SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5163 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5164 }
5165 }
5166 }
5167 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5168 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5169 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5170 // X < 0x100000000 -> (X >> 32) < 1
5171 // X >= 0x100000000 -> (X >> 32) >= 1
5172 // X <= 0x0ffffffff -> (X >> 32) < 1
5173 // X > 0x0ffffffff -> (X >> 32) >= 1
5174 unsigned ShiftBits;
5175 APInt NewC = C1;
5176 ISD::CondCode NewCond = Cond;
5177 if (AdjOne) {
5178 ShiftBits = C1.countr_one();
5179 NewC = NewC + 1;
5180 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5181 } else {
5182 ShiftBits = C1.countr_zero();
5183 }
5184 NewC.lshrInPlace(ShiftBits);
5185 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5187 !shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5188 SDValue Shift =
5189 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5190 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl));
5191 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5192 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5193 }
5194 }
5195 }
5196 }
5197
5198 if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5199 auto *CFP = cast<ConstantFPSDNode>(N1);
5200 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5201
5202 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5203 // constant if knowing that the operand is non-nan is enough. We prefer to
5204 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5205 // materialize 0.0.
5206 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5207 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5208
5209 // setcc (fneg x), C -> setcc swap(pred) x, -C
5210 if (N0.getOpcode() == ISD::FNEG) {
5212 if (DCI.isBeforeLegalizeOps() ||
5213 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5214 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5215 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5216 }
5217 }
5218
5219 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5221 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5222 bool IsFabs = N0.getOpcode() == ISD::FABS;
5223 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5224 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5225 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5226 : (IsFabs ? fcInf : fcPosInf);
5227 if (Cond == ISD::SETUEQ)
5228 Flag |= fcNan;
5229 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5230 DAG.getTargetConstant(Flag, dl, MVT::i32));
5231 }
5232 }
5233
5234 // If the condition is not legal, see if we can find an equivalent one
5235 // which is legal.
5237 // If the comparison was an awkward floating-point == or != and one of
5238 // the comparison operands is infinity or negative infinity, convert the
5239 // condition to a less-awkward <= or >=.
5240 if (CFP->getValueAPF().isInfinity()) {
5241 bool IsNegInf = CFP->getValueAPF().isNegative();
5243 switch (Cond) {
5244 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5245 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5246 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5247 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5248 default: break;
5249 }
5250 if (NewCond != ISD::SETCC_INVALID &&
5251 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5252 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5253 }
5254 }
5255 }
5256
5257 if (N0 == N1) {
5258 // The sext(setcc()) => setcc() optimization relies on the appropriate
5259 // constant being emitted.
5260 assert(!N0.getValueType().isInteger() &&
5261 "Integer types should be handled by FoldSetCC");
5262
5263 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5264 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5265 if (UOF == 2) // FP operators that are undefined on NaNs.
5266 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5267 if (UOF == unsigned(EqTrue))
5268 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5269 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5270 // if it is not already.
5271 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5272 if (NewCond != Cond &&
5273 (DCI.isBeforeLegalizeOps() ||
5274 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5275 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5276 }
5277
5278 // ~X > ~Y --> Y > X
5279 // ~X < ~Y --> Y < X
5280 // ~X < C --> X > ~C
5281 // ~X > C --> X < ~C
5282 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5283 N0.getValueType().isInteger()) {
5284 if (isBitwiseNot(N0)) {
5285 if (isBitwiseNot(N1))
5286 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5287
5290 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5291 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5292 }
5293 }
5294 }
5295
5296 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5297 N0.getValueType().isInteger()) {
5298 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5299 N0.getOpcode() == ISD::XOR) {
5300 // Simplify (X+Y) == (X+Z) --> Y == Z
5301 if (N0.getOpcode() == N1.getOpcode()) {
5302 if (N0.getOperand(0) == N1.getOperand(0))
5303 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5304 if (N0.getOperand(1) == N1.getOperand(1))
5305 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5306 if (isCommutativeBinOp(N0.getOpcode())) {
5307 // If X op Y == Y op X, try other combinations.
5308 if (N0.getOperand(0) == N1.getOperand(1))
5309 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5310 Cond);
5311 if (N0.getOperand(1) == N1.getOperand(0))
5312 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5313 Cond);
5314 }
5315 }
5316
5317 // If RHS is a legal immediate value for a compare instruction, we need
5318 // to be careful about increasing register pressure needlessly.
5319 bool LegalRHSImm = false;
5320
5321 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5322 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5323 // Turn (X+C1) == C2 --> X == C2-C1
5324 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5325 return DAG.getSetCC(
5326 dl, VT, N0.getOperand(0),
5327 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5328 dl, N0.getValueType()),
5329 Cond);
5330
5331 // Turn (X^C1) == C2 --> X == C1^C2
5332 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5333 return DAG.getSetCC(
5334 dl, VT, N0.getOperand(0),
5335 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5336 dl, N0.getValueType()),
5337 Cond);
5338 }
5339
5340 // Turn (C1-X) == C2 --> X == C1-C2
5341 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5342 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5343 return DAG.getSetCC(
5344 dl, VT, N0.getOperand(1),
5345 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5346 dl, N0.getValueType()),
5347 Cond);
5348
5349 // Could RHSC fold directly into a compare?
5350 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5351 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5352 }
5353
5354 // (X+Y) == X --> Y == 0 and similar folds.
5355 // Don't do this if X is an immediate that can fold into a cmp
5356 // instruction and X+Y has other uses. It could be an induction variable
5357 // chain, and the transform would increase register pressure.
5358 if (!LegalRHSImm || N0.hasOneUse())
5359 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5360 return V;
5361 }
5362
5363 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5364 N1.getOpcode() == ISD::XOR)
5365 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5366 return V;
5367
5368 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5369 return V;
5370 }
5371
5372 // Fold remainder of division by a constant.
5373 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5374 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5375 // When division is cheap or optimizing for minimum size,
5376 // fall through to DIVREM creation by skipping this fold.
5377 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5378 if (N0.getOpcode() == ISD::UREM) {
5379 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5380 return Folded;
5381 } else if (N0.getOpcode() == ISD::SREM) {
5382 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5383 return Folded;
5384 }
5385 }
5386 }
5387
5388 // Fold away ALL boolean setcc's.
5389 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5390 SDValue Temp;
5391 switch (Cond) {
5392 default: llvm_unreachable("Unknown integer setcc!");
5393 case ISD::SETEQ: // X == Y -> ~(X^Y)
5394 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5395 N0 = DAG.getNOT(dl, Temp, OpVT);
5396 if (!DCI.isCalledByLegalizer())
5397 DCI.AddToWorklist(Temp.getNode());
5398 break;
5399 case ISD::SETNE: // X != Y --> (X^Y)
5400 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5401 break;
5402 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5403 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5404 Temp = DAG.getNOT(dl, N0, OpVT);
5405 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5406 if (!DCI.isCalledByLegalizer())
5407 DCI.AddToWorklist(Temp.getNode());
5408 break;
5409 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5410 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5411 Temp = DAG.getNOT(dl, N1, OpVT);
5412 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5413 if (!DCI.isCalledByLegalizer())
5414 DCI.AddToWorklist(Temp.getNode());
5415 break;
5416 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5417 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5418 Temp = DAG.getNOT(dl, N0, OpVT);
5419 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5420 if (!DCI.isCalledByLegalizer())
5421 DCI.AddToWorklist(Temp.getNode());
5422 break;
5423 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5424 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5425 Temp = DAG.getNOT(dl, N1, OpVT);
5426 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5427 break;
5428 }
5429 if (VT.getScalarType() != MVT::i1) {
5430 if (!DCI.isCalledByLegalizer())
5431 DCI.AddToWorklist(N0.getNode());
5432 // FIXME: If running after legalize, we probably can't do this.
5434 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5435 }
5436 return N0;
5437 }
5438
5439 // Could not fold it.
5440 return SDValue();
5441}
5442
5443/// Returns true (and the GlobalValue and the offset) if the node is a
5444/// GlobalAddress + offset.
5446 int64_t &Offset) const {
5447
5448 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5449
5450 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5451 GA = GASD->getGlobal();
5452 Offset += GASD->getOffset();
5453 return true;
5454 }
5455
5456 if (N->getOpcode() == ISD::ADD) {
5457 SDValue N1 = N->getOperand(0);
5458 SDValue N2 = N->getOperand(1);
5459 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5460 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5461 Offset += V->getSExtValue();
5462 return true;
5463 }
5464 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5465 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5466 Offset += V->getSExtValue();
5467 return true;
5468 }
5469 }
5470 }
5471
5472 return false;
5473}
5474
5476 DAGCombinerInfo &DCI) const {
5477 // Default implementation: no optimization.
5478 return SDValue();
5479}
5480
5481//===----------------------------------------------------------------------===//
5482// Inline Assembler Implementation Methods
5483//===----------------------------------------------------------------------===//
5484
5487 unsigned S = Constraint.size();
5488
5489 if (S == 1) {
5490 switch (Constraint[0]) {
5491 default: break;
5492 case 'r':
5493 return C_RegisterClass;
5494 case 'm': // memory
5495 case 'o': // offsetable
5496 case 'V': // not offsetable
5497 return C_Memory;
5498 case 'p': // Address.
5499 return C_Address;
5500 case 'n': // Simple Integer
5501 case 'E': // Floating Point Constant
5502 case 'F': // Floating Point Constant
5503 return C_Immediate;
5504 case 'i': // Simple Integer or Relocatable Constant
5505 case 's': // Relocatable Constant
5506 case 'X': // Allow ANY value.
5507 case 'I': // Target registers.
5508 case 'J':
5509 case 'K':
5510 case 'L':
5511 case 'M':
5512 case 'N':
5513 case 'O':
5514 case 'P':
5515 case '<':
5516 case '>':
5517 return C_Other;
5518 }
5519 }
5520
5521 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5522 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5523 return C_Memory;
5524 return C_Register;
5525 }
5526 return C_Unknown;
5527}
5528
5529/// Try to replace an X constraint, which matches anything, with another that
5530/// has more specific requirements based on the type of the corresponding
5531/// operand.
5532const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5533 if (ConstraintVT.isInteger())
5534 return "r";
5535 if (ConstraintVT.isFloatingPoint())
5536 return "f"; // works for many targets
5537 return nullptr;
5538}
5539
5541 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5542 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5543 return SDValue();
5544}
5545
5546/// Lower the specified operand into the Ops vector.
5547/// If it is invalid, don't add anything to Ops.
5549 StringRef Constraint,
5550 std::vector<SDValue> &Ops,
5551 SelectionDAG &DAG) const {
5552
5553 if (Constraint.size() > 1)
5554 return;
5555
5556 char ConstraintLetter = Constraint[0];
5557 switch (ConstraintLetter) {
5558 default: break;
5559 case 'X': // Allows any operand
5560 case 'i': // Simple Integer or Relocatable Constant
5561 case 'n': // Simple Integer
5562 case 's': { // Relocatable Constant
5563
5565 uint64_t Offset = 0;
5566
5567 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5568 // etc., since getelementpointer is variadic. We can't use
5569 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5570 // while in this case the GA may be furthest from the root node which is
5571 // likely an ISD::ADD.
5572 while (true) {
5573 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5574 // gcc prints these as sign extended. Sign extend value to 64 bits
5575 // now; without this it would get ZExt'd later in
5576 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5577 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5578 BooleanContent BCont = getBooleanContents(MVT::i64);
5579 ISD::NodeType ExtOpc =
5580 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5581 int64_t ExtVal =
5582 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5583 Ops.push_back(
5584 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5585 return;
5586 }
5587 if (ConstraintLetter != 'n') {
5588 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5589 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5590 GA->getValueType(0),
5591 Offset + GA->getOffset()));
5592 return;
5593 }
5594 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5595 Ops.push_back(DAG.getTargetBlockAddress(
5596 BA->getBlockAddress(), BA->getValueType(0),
5597 Offset + BA->getOffset(), BA->getTargetFlags()));
5598 return;
5599 }
5600 if (isa<BasicBlockSDNode>(Op)) {
5601 Ops.push_back(Op);
5602 return;
5603 }
5604 }
5605 const unsigned OpCode = Op.getOpcode();
5606 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5607 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5608 Op = Op.getOperand(1);
5609 // Subtraction is not commutative.
5610 else if (OpCode == ISD::ADD &&
5611 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5612 Op = Op.getOperand(0);
5613 else
5614 return;
5615 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5616 continue;
5617 }
5618 return;
5619 }
5620 break;
5621 }
5622 }
5623}
5624
5626 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5627}
5628
5629std::pair<unsigned, const TargetRegisterClass *>
5631 StringRef Constraint,
5632 MVT VT) const {
5633 if (!Constraint.starts_with("{"))
5634 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5635 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5636
5637 // Remove the braces from around the name.
5638 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5639
5640 std::pair<unsigned, const TargetRegisterClass *> R =
5641 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5642
5643 // Figure out which register class contains this reg.
5644 for (const TargetRegisterClass *RC : RI->regclasses()) {
5645 // If none of the value types for this register class are valid, we
5646 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5647 if (!isLegalRC(*RI, *RC))
5648 continue;
5649
5650 for (const MCPhysReg &PR : *RC) {
5651 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5652 std::pair<unsigned, const TargetRegisterClass *> S =
5653 std::make_pair(PR, RC);
5654
5655 // If this register class has the requested value type, return it,
5656 // otherwise keep searching and return the first class found
5657 // if no other is found which explicitly has the requested type.
5658 if (RI->isTypeLegalForClass(*RC, VT))
5659 return S;
5660 if (!R.second)
5661 R = S;
5662 }
5663 }
5664 }
5665
5666 return R;
5667}
5668
5669//===----------------------------------------------------------------------===//
5670// Constraint Selection.
5671
5672/// Return true of this is an input operand that is a matching constraint like
5673/// "4".
5675 assert(!ConstraintCode.empty() && "No known constraint!");
5676 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5677}
5678
5679/// If this is an input matching constraint, this method returns the output
5680/// operand it matches.
5682 assert(!ConstraintCode.empty() && "No known constraint!");
5683 return atoi(ConstraintCode.c_str());
5684}
5685
5686/// Split up the constraint string from the inline assembly value into the
5687/// specific constraints and their prefixes, and also tie in the associated
5688/// operand values.
5689/// If this returns an empty vector, and if the constraint string itself
5690/// isn't empty, there was an error parsing.
5693 const TargetRegisterInfo *TRI,
5694 const CallBase &Call) const {
5695 /// Information about all of the constraints.
5696 AsmOperandInfoVector ConstraintOperands;
5697 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5698 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5699
5700 // Do a prepass over the constraints, canonicalizing them, and building up the
5701 // ConstraintOperands list.
5702 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5703 unsigned ResNo = 0; // ResNo - The result number of the next output.
5704 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5705
5706 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5707 ConstraintOperands.emplace_back(std::move(CI));
5708 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5709
5710 // Update multiple alternative constraint count.
5711 if (OpInfo.multipleAlternatives.size() > maCount)
5712 maCount = OpInfo.multipleAlternatives.size();
5713
5714 OpInfo.ConstraintVT = MVT::Other;
5715
5716 // Compute the value type for each operand.
5717 switch (OpInfo.Type) {
5719 // Indirect outputs just consume an argument.
5720 if (OpInfo.isIndirect) {
5721 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5722 break;
5723 }
5724
5725 // The return value of the call is this value. As such, there is no
5726 // corresponding argument.
5727 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5728 if (auto *STy = dyn_cast<StructType>(Call.getType())) {
5729 OpInfo.ConstraintVT =
5730 getSimpleValueType(DL, STy->getElementType(ResNo));
5731 } else {
5732 assert(ResNo == 0 && "Asm only has one result!");
5733 OpInfo.ConstraintVT =
5734 getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5735 }
5736 ++ResNo;
5737 break;
5738 case InlineAsm::isInput:
5739 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5740 break;
5741 case InlineAsm::isLabel:
5742 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5743 ++LabelNo;
5744 continue;
5746 // Nothing to do.
5747 break;
5748 }
5749
5750 if (OpInfo.CallOperandVal) {
5751 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5752 if (OpInfo.isIndirect) {
5753 OpTy = Call.getParamElementType(ArgNo);
5754 assert(OpTy && "Indirect operand must have elementtype attribute");
5755 }
5756
5757 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5758 if (StructType *STy = dyn_cast<StructType>(OpTy))
5759 if (STy->getNumElements() == 1)
5760 OpTy = STy->getElementType(0);
5761
5762 // If OpTy is not a single value, it may be a struct/union that we
5763 // can tile with integers.
5764 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5765 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5766 switch (BitSize) {
5767 default: break;
5768 case 1:
5769 case 8:
5770 case 16:
5771 case 32:
5772 case 64:
5773 case 128:
5774 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5775 break;
5776 }
5777 }
5778
5779 EVT VT = getAsmOperandValueType(DL, OpTy, true);
5780 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5781 ArgNo++;
5782 }
5783 }
5784
5785 // If we have multiple alternative constraints, select the best alternative.
5786 if (!ConstraintOperands.empty()) {
5787 if (maCount) {
5788 unsigned bestMAIndex = 0;
5789 int bestWeight = -1;
5790 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5791 int weight = -1;
5792 unsigned maIndex;
5793 // Compute the sums of the weights for each alternative, keeping track
5794 // of the best (highest weight) one so far.
5795 for (maIndex = 0; maIndex < maCount; ++maIndex) {
5796 int weightSum = 0;
5797 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5798 cIndex != eIndex; ++cIndex) {
5799 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5800 if (OpInfo.Type == InlineAsm::isClobber)
5801 continue;
5802
5803 // If this is an output operand with a matching input operand,
5804 // look up the matching input. If their types mismatch, e.g. one
5805 // is an integer, the other is floating point, or their sizes are
5806 // different, flag it as an maCantMatch.
5807 if (OpInfo.hasMatchingInput()) {
5808 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5809 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5810 if ((OpInfo.ConstraintVT.isInteger() !=
5811 Input.ConstraintVT.isInteger()) ||
5812 (OpInfo.ConstraintVT.getSizeInBits() !=
5813 Input.ConstraintVT.getSizeInBits())) {
5814 weightSum = -1; // Can't match.
5815 break;
5816 }
5817 }
5818 }
5819 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5820 if (weight == -1) {
5821 weightSum = -1;
5822 break;
5823 }
5824 weightSum += weight;
5825 }
5826 // Update best.
5827 if (weightSum > bestWeight) {
5828 bestWeight = weightSum;
5829 bestMAIndex = maIndex;
5830 }
5831 }
5832
5833 // Now select chosen alternative in each constraint.
5834 for (AsmOperandInfo &cInfo : ConstraintOperands)
5835 if (cInfo.Type != InlineAsm::isClobber)
5836 cInfo.selectAlternative(bestMAIndex);
5837 }
5838 }
5839
5840 // Check and hook up tied operands, choose constraint code to use.
5841 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5842 cIndex != eIndex; ++cIndex) {
5843 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5844
5845 // If this is an output operand with a matching input operand, look up the
5846 // matching input. If their types mismatch, e.g. one is an integer, the
5847 // other is floating point, or their sizes are different, flag it as an
5848 // error.
5849 if (OpInfo.hasMatchingInput()) {
5850 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5851
5852 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5853 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5854 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5855 OpInfo.ConstraintVT);
5856 std::pair<unsigned, const TargetRegisterClass *> InputRC =
5857 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5858 Input.ConstraintVT);
5859 if ((OpInfo.ConstraintVT.isInteger() !=
5860 Input.ConstraintVT.isInteger()) ||
5861 (MatchRC.second != InputRC.second)) {
5862 report_fatal_error("Unsupported asm: input constraint"
5863 " with a matching output constraint of"
5864 " incompatible type!");
5865 }
5866 }
5867 }
5868 }
5869
5870 return ConstraintOperands;
5871}
5872
5873/// Return a number indicating our preference for chosing a type of constraint
5874/// over another, for the purpose of sorting them. Immediates are almost always
5875/// preferrable (when they can be emitted). A higher return value means a
5876/// stronger preference for one constraint type relative to another.
5877/// FIXME: We should prefer registers over memory but doing so may lead to
5878/// unrecoverable register exhaustion later.
5879/// https://github.com/llvm/llvm-project/issues/20571
5881 switch (CT) {
5884 return 4;
5887 return 3;
5889 return 2;
5891 return 1;
5893 return 0;
5894 }
5895 llvm_unreachable("Invalid constraint type");
5896}
5897
5898/// Examine constraint type and operand type and determine a weight value.
5899/// This object must already have been set up with the operand type
5900/// and the current alternative constraint selected.
5903 AsmOperandInfo &info, int maIndex) const {
5905 if (maIndex >= (int)info.multipleAlternatives.size())
5906 rCodes = &info.Codes;
5907 else
5908 rCodes = &info.multipleAlternatives[maIndex].Codes;
5909 ConstraintWeight BestWeight = CW_Invalid;
5910
5911 // Loop over the options, keeping track of the most general one.
5912 for (const std::string &rCode : *rCodes) {
5913 ConstraintWeight weight =
5914 getSingleConstraintMatchWeight(info, rCode.c_str());
5915 if (weight > BestWeight)
5916 BestWeight = weight;
5917 }
5918
5919 return BestWeight;
5920}
5921
5922/// Examine constraint type and operand type and determine a weight value.
5923/// This object must already have been set up with the operand type
5924/// and the current alternative constraint selected.
5927 AsmOperandInfo &info, const char *constraint) const {
5928 ConstraintWeight weight = CW_Invalid;
5929 Value *CallOperandVal = info.CallOperandVal;
5930 // If we don't have a value, we can't do a match,
5931 // but allow it at the lowest weight.
5932 if (!CallOperandVal)
5933 return CW_Default;
5934 // Look at the constraint type.
5935 switch (*constraint) {
5936 case 'i': // immediate integer.
5937 case 'n': // immediate integer with a known value.
5938 if (isa<ConstantInt>(CallOperandVal))
5939 weight = CW_Constant;
5940 break;
5941 case 's': // non-explicit intregal immediate.
5942 if (isa<GlobalValue>(CallOperandVal))
5943 weight = CW_Constant;
5944 break;
5945 case 'E': // immediate float if host format.
5946 case 'F': // immediate float.
5947 if (isa<ConstantFP>(CallOperandVal))
5948 weight = CW_Constant;
5949 break;
5950 case '<': // memory operand with autodecrement.
5951 case '>': // memory operand with autoincrement.
5952 case 'm': // memory operand.
5953 case 'o': // offsettable memory operand
5954 case 'V': // non-offsettable memory operand
5955 weight = CW_Memory;
5956 break;
5957 case 'r': // general register.
5958 case 'g': // general register, memory operand or immediate integer.
5959 // note: Clang converts "g" to "imr".
5960 if (CallOperandVal->getType()->isIntegerTy())
5961 weight = CW_Register;
5962 break;
5963 case 'X': // any operand.
5964 default:
5965 weight = CW_Default;
5966 break;
5967 }
5968 return weight;
5969}
5970
5971/// If there are multiple different constraints that we could pick for this
5972/// operand (e.g. "imr") try to pick the 'best' one.
5973/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5974/// into seven classes:
5975/// Register -> one specific register
5976/// RegisterClass -> a group of regs
5977/// Memory -> memory
5978/// Address -> a symbolic memory reference
5979/// Immediate -> immediate values
5980/// Other -> magic values (such as "Flag Output Operands")
5981/// Unknown -> something we don't recognize yet and can't handle
5982/// Ideally, we would pick the most specific constraint possible: if we have
5983/// something that fits into a register, we would pick it. The problem here
5984/// is that if we have something that could either be in a register or in
5985/// memory that use of the register could cause selection of *other*
5986/// operands to fail: they might only succeed if we pick memory. Because of
5987/// this the heuristic we use is:
5988///
5989/// 1) If there is an 'other' constraint, and if the operand is valid for
5990/// that constraint, use it. This makes us take advantage of 'i'
5991/// constraints when available.
5992/// 2) Otherwise, pick the most general constraint present. This prefers
5993/// 'm' over 'r', for example.
5994///
5996 TargetLowering::AsmOperandInfo &OpInfo) const {
5997 ConstraintGroup Ret;
5998
5999 Ret.reserve(OpInfo.Codes.size());
6000 for (StringRef Code : OpInfo.Codes) {
6001 TargetLowering::ConstraintType CType = getConstraintType(Code);
6002
6003 // Indirect 'other' or 'immediate' constraints are not allowed.
6004 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
6005 CType == TargetLowering::C_Register ||
6007 continue;
6008
6009 // Things with matching constraints can only be registers, per gcc
6010 // documentation. This mainly affects "g" constraints.
6011 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
6012 continue;
6013
6014 Ret.emplace_back(Code, CType);
6015 }
6016
6017 std::stable_sort(
6018 Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
6019 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
6020 });
6021
6022 return Ret;
6023}
6024
6025/// If we have an immediate, see if we can lower it. Return true if we can,
6026/// false otherwise.
6028 SDValue Op, SelectionDAG *DAG,
6029 const TargetLowering &TLI) {
6030
6031 assert((P.second == TargetLowering::C_Other ||
6032 P.second == TargetLowering::C_Immediate) &&
6033 "need immediate or other");
6034
6035 if (!Op.getNode())
6036 return false;
6037
6038 std::vector<SDValue> ResultOps;
6039 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6040 return !ResultOps.empty();
6041}
6042
6043/// Determines the constraint code and constraint type to use for the specific
6044/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6046 SDValue Op,
6047 SelectionDAG *DAG) const {
6048 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6049
6050 // Single-letter constraints ('r') are very common.
6051 if (OpInfo.Codes.size() == 1) {
6052 OpInfo.ConstraintCode = OpInfo.Codes[0];
6053 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6054 } else {
6055 ConstraintGroup G = getConstraintPreferences(OpInfo);
6056 if (G.empty())
6057 return;
6058
6059 unsigned BestIdx = 0;
6060 for (const unsigned E = G.size();
6061 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6062 G[BestIdx].second == TargetLowering::C_Immediate);
6063 ++BestIdx) {
6064 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6065 break;
6066 // If we're out of constraints, just pick the first one.
6067 if (BestIdx + 1 == E) {
6068 BestIdx = 0;
6069 break;
6070 }
6071 }
6072
6073 OpInfo.ConstraintCode = G[BestIdx].first;
6074 OpInfo.ConstraintType = G[BestIdx].second;
6075 }
6076
6077 // 'X' matches anything.
6078 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6079 // Constants are handled elsewhere. For Functions, the type here is the
6080 // type of the result, which is not what we want to look at; leave them
6081 // alone.
6082 Value *v = OpInfo.CallOperandVal;
6083 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6084 return;
6085 }
6086
6087 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6088 OpInfo.ConstraintCode = "i";
6089 return;
6090 }
6091
6092 // Otherwise, try to resolve it to something we know about by looking at
6093 // the actual operand type.
6094 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6095 OpInfo.ConstraintCode = Repl;
6096 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6097 }
6098 }
6099}
6100
6101/// Given an exact SDIV by a constant, create a multiplication
6102/// with the multiplicative inverse of the constant.
6103/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6105 const SDLoc &dl, SelectionDAG &DAG,
6106 SmallVectorImpl<SDNode *> &Created) {
6107 SDValue Op0 = N->getOperand(0);
6108 SDValue Op1 = N->getOperand(1);
6109 EVT VT = N->getValueType(0);
6110 EVT SVT = VT.getScalarType();
6111 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6112 EVT ShSVT = ShVT.getScalarType();
6113
6114 bool UseSRA = false;
6115 SmallVector<SDValue, 16> Shifts, Factors;
6116
6117 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6118 if (C->isZero())
6119 return false;
6120 APInt Divisor = C->getAPIntValue();
6121 unsigned Shift = Divisor.countr_zero();
6122 if (Shift) {
6123 Divisor.ashrInPlace(Shift);
6124 UseSRA = true;
6125 }
6126 APInt Factor = Divisor.multiplicativeInverse();
6127 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6128 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6129 return true;
6130 };
6131
6132 // Collect all magic values from the build vector.
6133 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6134 return SDValue();
6135
6136 SDValue Shift, Factor;
6137 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6138 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6139 Factor = DAG.getBuildVector(VT, dl, Factors);
6140 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6141 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6142 "Expected matchUnaryPredicate to return one element for scalable "
6143 "vectors");
6144 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6145 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6146 } else {
6147 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6148 Shift = Shifts[0];
6149 Factor = Factors[0];
6150 }
6151
6152 SDValue Res = Op0;
6153 if (UseSRA) {
6154 SDNodeFlags Flags;
6155 Flags.setExact(true);
6156 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
6157 Created.push_back(Res.getNode());
6158 }
6159
6160 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6161}
6162
6163/// Given an exact UDIV by a constant, create a multiplication
6164/// with the multiplicative inverse of the constant.
6165/// Ref: "Hacker's Delight" by Henry Warren, 2nd Edition, p. 242
6167 const SDLoc &dl, SelectionDAG &DAG,
6168 SmallVectorImpl<SDNode *> &Created) {
6169 EVT VT = N->getValueType(0);
6170 EVT SVT = VT.getScalarType();
6171 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6172 EVT ShSVT = ShVT.getScalarType();
6173
6174 bool UseSRL = false;
6175 SmallVector<SDValue, 16> Shifts, Factors;
6176
6177 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6178 if (C->isZero())
6179 return false;
6180 APInt Divisor = C->getAPIntValue();
6181 unsigned Shift = Divisor.countr_zero();
6182 if (Shift) {
6183 Divisor.lshrInPlace(Shift);
6184 UseSRL = true;
6185 }
6186 // Calculate the multiplicative inverse modulo BW.
6187 APInt Factor = Divisor.multiplicativeInverse();
6188 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6189 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6190 return true;
6191 };
6192
6193 SDValue Op1 = N->getOperand(1);
6194
6195 // Collect all magic values from the build vector.
6196 if (!ISD::matchUnaryPredicate(Op1, BuildUDIVPattern))
6197 return SDValue();
6198
6199 SDValue Shift, Factor;
6200 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6201 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6202 Factor = DAG.getBuildVector(VT, dl, Factors);
6203 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6204 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6205 "Expected matchUnaryPredicate to return one element for scalable "
6206 "vectors");
6207 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6208 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6209 } else {
6210 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6211 Shift = Shifts[0];
6212 Factor = Factors[0];
6213 }
6214
6215 SDValue Res = N->getOperand(0);
6216 if (UseSRL) {
6217 SDNodeFlags Flags;
6218 Flags.setExact(true);
6219 Res = DAG.getNode(ISD::SRL, dl, VT, Res, Shift, Flags);
6220 Created.push_back(Res.getNode());
6221 }
6222
6223 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6224}
6225
6227 SelectionDAG &DAG,
6228 SmallVectorImpl<SDNode *> &Created) const {
6230 if (isIntDivCheap(N->getValueType(0), Attr))
6231 return SDValue(N, 0); // Lower SDIV as SDIV
6232 return SDValue();
6233}
6234
6235SDValue
6237 SelectionDAG &DAG,
6238 SmallVectorImpl<SDNode *> &Created) const {
6240 if (isIntDivCheap(N->getValueType(0), Attr))
6241 return SDValue(N, 0); // Lower SREM as SREM
6242 return SDValue();
6243}
6244
6245/// Build sdiv by power-of-2 with conditional move instructions
6246/// Ref: "Hacker's Delight" by Henry Warren 10-1
6247/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6248/// bgez x, label
6249/// add x, x, 2**k-1
6250/// label:
6251/// sra res, x, k
6252/// neg res, res (when the divisor is negative)
6254 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6255 SmallVectorImpl<SDNode *> &Created) const {
6256 unsigned Lg2 = Divisor.countr_zero();
6257 EVT VT = N->getValueType(0);
6258
6259 SDLoc DL(N);
6260 SDValue N0 = N->getOperand(0);
6261 SDValue Zero = DAG.getConstant(0, DL, VT);
6262 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6263 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6264
6265 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6266 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6267 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6268 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6269 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6270
6271 Created.push_back(Cmp.getNode());
6272 Created.push_back(Add.getNode());
6273 Created.push_back(CMov.getNode());
6274
6275 // Divide by pow2.
6276 SDValue SRA =
6277 DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6278
6279 // If we're dividing by a positive value, we're done. Otherwise, we must
6280 // negate the result.
6281 if (Divisor.isNonNegative())
6282 return SRA;
6283
6284 Created.push_back(SRA.getNode());
6285 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6286}
6287
6288/// Given an ISD::SDIV node expressing a divide by constant,
6289/// return a DAG expression to select that will generate the same value by
6290/// multiplying by a magic number.
6291/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6293 bool IsAfterLegalization,
6294 bool IsAfterLegalTypes,
6295 SmallVectorImpl<SDNode *> &Created) const {
6296 SDLoc dl(N);
6297 EVT VT = N->getValueType(0);
6298 EVT SVT = VT.getScalarType();
6299 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6300 EVT ShSVT = ShVT.getScalarType();
6301 unsigned EltBits = VT.getScalarSizeInBits();
6302 EVT MulVT;
6303
6304 // Check to see if we can do this.
6305 // FIXME: We should be more aggressive here.
6306 if (!isTypeLegal(VT)) {
6307 // Limit this to simple scalars for now.
6308 if (VT.isVector() || !VT.isSimple())
6309 return SDValue();
6310
6311 // If this type will be promoted to a large enough type with a legal
6312 // multiply operation, we can go ahead and do this transform.
6314 return SDValue();
6315
6316 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6317 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6318 !isOperationLegal(ISD::MUL, MulVT))
6319 return SDValue();
6320 }
6321
6322 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6323 if (N->getFlags().hasExact())
6324 return BuildExactSDIV(*this, N, dl, DAG, Created);
6325
6326 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6327
6328 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6329 if (C->isZero())
6330 return false;
6331
6332 const APInt &Divisor = C->getAPIntValue();
6334 int NumeratorFactor = 0;
6335 int ShiftMask = -1;
6336
6337 if (Divisor.isOne() || Divisor.isAllOnes()) {
6338 // If d is +1/-1, we just multiply the numerator by +1/-1.
6339 NumeratorFactor = Divisor.getSExtValue();
6340 magics.Magic = 0;
6341 magics.ShiftAmount = 0;
6342 ShiftMask = 0;
6343 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6344 // If d > 0 and m < 0, add the numerator.
6345 NumeratorFactor = 1;
6346 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6347 // If d < 0 and m > 0, subtract the numerator.
6348 NumeratorFactor = -1;
6349 }
6350
6351 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6352 Factors.push_back(DAG.getSignedConstant(NumeratorFactor, dl, SVT));
6353 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6354 ShiftMasks.push_back(DAG.getSignedConstant(ShiftMask, dl, SVT));
6355 return true;
6356 };
6357
6358 SDValue N0 = N->getOperand(0);
6359 SDValue N1 = N->getOperand(1);
6360
6361 // Collect the shifts / magic values from each element.
6362 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6363 return SDValue();
6364
6365 SDValue MagicFactor, Factor, Shift, ShiftMask;
6366 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6367 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6368 Factor = DAG.getBuildVector(VT, dl, Factors);
6369 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6370 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6371 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6372 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6373 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6374 "Expected matchUnaryPredicate to return one element for scalable "
6375 "vectors");
6376 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6377 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6378 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6379 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6380 } else {
6381 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6382 MagicFactor = MagicFactors[0];
6383 Factor = Factors[0];
6384 Shift = Shifts[0];
6385 ShiftMask = ShiftMasks[0];
6386 }
6387
6388 // Multiply the numerator (operand 0) by the magic value.
6389 // FIXME: We should support doing a MUL in a wider type.
6390 auto GetMULHS = [&](SDValue X, SDValue Y) {
6391 // If the type isn't legal, use a wider mul of the type calculated
6392 // earlier.
6393 if (!isTypeLegal(VT)) {
6394 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6395 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6396 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6397 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6398 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6399 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6400 }
6401
6402 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6403 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6404 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6405 SDValue LoHi =
6406 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6407 return SDValue(LoHi.getNode(), 1);
6408 }
6409 // If type twice as wide legal, widen and use a mul plus a shift.
6410 unsigned Size = VT.getScalarSizeInBits();
6411 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6412 if (VT.isVector())
6413 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6415 // Some targets like AMDGPU try to go from SDIV to SDIVREM which is then
6416 // custom lowered. This is very expensive so avoid it at all costs for
6417 // constant divisors.
6418 if ((!IsAfterLegalTypes && isOperationExpand(ISD::SDIV, VT) &&
6421 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6422 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6423 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6424 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6425 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6426 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6427 }
6428 return SDValue();
6429 };
6430
6431 SDValue Q = GetMULHS(N0, MagicFactor);
6432 if (!Q)
6433 return SDValue();
6434
6435 Created.push_back(Q.getNode());
6436
6437 // (Optionally) Add/subtract the numerator using Factor.
6438 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6439 Created.push_back(Factor.getNode());
6440 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6441 Created.push_back(Q.getNode());
6442
6443 // Shift right algebraic by shift value.
6444 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6445 Created.push_back(Q.getNode());
6446
6447 // Extract the sign bit, mask it and add it to the quotient.
6448 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6449 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6450 Created.push_back(T.getNode());
6451 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6452 Created.push_back(T.getNode());
6453 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6454}
6455
6456/// Given an ISD::UDIV node expressing a divide by constant,
6457/// return a DAG expression to select that will generate the same value by
6458/// multiplying by a magic number.
6459/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6461 bool IsAfterLegalization,
6462 bool IsAfterLegalTypes,
6463 SmallVectorImpl<SDNode *> &Created) const {
6464 SDLoc dl(N);
6465 EVT VT = N->getValueType(0);
6466 EVT SVT = VT.getScalarType();
6467 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6468 EVT ShSVT = ShVT.getScalarType();
6469 unsigned EltBits = VT.getScalarSizeInBits();
6470 EVT MulVT;
6471
6472 // Check to see if we can do this.
6473 // FIXME: We should be more aggressive here.
6474 if (!isTypeLegal(VT)) {
6475 // Limit this to simple scalars for now.
6476 if (VT.isVector() || !VT.isSimple())
6477 return SDValue();
6478
6479 // If this type will be promoted to a large enough type with a legal
6480 // multiply operation, we can go ahead and do this transform.
6482 return SDValue();
6483
6484 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6485 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6486 !isOperationLegal(ISD::MUL, MulVT))
6487 return SDValue();
6488 }
6489
6490 // If the udiv has an 'exact' bit we can use a simpler lowering.
6491 if (N->getFlags().hasExact())
6492 return BuildExactUDIV(*this, N, dl, DAG, Created);
6493
6494 SDValue N0 = N->getOperand(0);
6495 SDValue N1 = N->getOperand(1);
6496
6497 // Try to use leading zeros of the dividend to reduce the multiplier and
6498 // avoid expensive fixups.
6499 unsigned KnownLeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6500
6501 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6502 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6503
6504 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6505 if (C->isZero())
6506 return false;
6507 const APInt& Divisor = C->getAPIntValue();
6508
6509 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6510
6511 // Magic algorithm doesn't work for division by 1. We need to emit a select
6512 // at the end.
6513 if (Divisor.isOne()) {
6514 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6515 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6516 } else {
6519 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
6520
6521 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6522
6523 assert(magics.PreShift < Divisor.getBitWidth() &&
6524 "We shouldn't generate an undefined shift!");
6525 assert(magics.PostShift < Divisor.getBitWidth() &&
6526 "We shouldn't generate an undefined shift!");
6527 assert((!magics.IsAdd || magics.PreShift == 0) &&
6528 "Unexpected pre-shift");
6529 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6530 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6531 NPQFactor = DAG.getConstant(
6532 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6533 : APInt::getZero(EltBits),
6534 dl, SVT);
6535 UseNPQ |= magics.IsAdd;
6536 UsePreShift |= magics.PreShift != 0;
6537 UsePostShift |= magics.PostShift != 0;
6538 }
6539
6540 PreShifts.push_back(PreShift);
6541 MagicFactors.push_back(MagicFactor);
6542 NPQFactors.push_back(NPQFactor);
6543 PostShifts.push_back(PostShift);
6544 return true;
6545 };
6546
6547 // Collect the shifts/magic values from each element.
6548 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6549 return SDValue();
6550
6551 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6552 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6553 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6554 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6555 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6556 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6557 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6558 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6559 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6560 "Expected matchUnaryPredicate to return one for scalable vectors");
6561 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6562 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6563 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6564 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6565 } else {
6566 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6567 PreShift = PreShifts[0];
6568 MagicFactor = MagicFactors[0];
6569 PostShift = PostShifts[0];
6570 }
6571
6572 SDValue Q = N0;
6573 if (UsePreShift) {
6574 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6575 Created.push_back(Q.getNode());
6576 }
6577
6578 // FIXME: We should support doing a MUL in a wider type.
6579 auto GetMULHU = [&](SDValue X, SDValue Y) {
6580 // If the type isn't legal, use a wider mul of the type calculated
6581 // earlier.
6582 if (!isTypeLegal(VT)) {
6583 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6584 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6585 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6586 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6587 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6588 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6589 }
6590
6591 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6592 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6593 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6594 SDValue LoHi =
6595 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6596 return SDValue(LoHi.getNode(), 1);
6597 }
6598 // If type twice as wide legal, widen and use a mul plus a shift.
6599 unsigned Size = VT.getScalarSizeInBits();
6600 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6601 if (VT.isVector())
6602 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6604 // Some targets like AMDGPU try to go from UDIV to UDIVREM which is then
6605 // custom lowered. This is very expensive so avoid it at all costs for
6606 // constant divisors.
6607 if ((!IsAfterLegalTypes && isOperationExpand(ISD::UDIV, VT) &&
6610 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6611 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6612 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6613 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6614 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6615 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6616 }
6617 return SDValue(); // No mulhu or equivalent
6618 };
6619
6620 // Multiply the numerator (operand 0) by the magic value.
6621 Q = GetMULHU(Q, MagicFactor);
6622 if (!Q)
6623 return SDValue();
6624
6625 Created.push_back(Q.getNode());
6626
6627 if (UseNPQ) {
6628 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6629 Created.push_back(NPQ.getNode());
6630
6631 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6632 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6633 if (VT.isVector())
6634 NPQ = GetMULHU(NPQ, NPQFactor);
6635 else
6636 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6637
6638 Created.push_back(NPQ.getNode());
6639
6640 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6641 Created.push_back(Q.getNode());
6642 }
6643
6644 if (UsePostShift) {
6645 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6646 Created.push_back(Q.getNode());
6647 }
6648
6649 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6650
6651 SDValue One = DAG.getConstant(1, dl, VT);
6652 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6653 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6654}
6655
6656/// If all values in Values that *don't* match the predicate are same 'splat'
6657/// value, then replace all values with that splat value.
6658/// Else, if AlternativeReplacement was provided, then replace all values that
6659/// do match predicate with AlternativeReplacement value.
6660static void
6662 std::function<bool(SDValue)> Predicate,
6663 SDValue AlternativeReplacement = SDValue()) {
6664 SDValue Replacement;
6665 // Is there a value for which the Predicate does *NOT* match? What is it?
6666 auto SplatValue = llvm::find_if_not(Values, Predicate);
6667 if (SplatValue != Values.end()) {
6668 // Does Values consist only of SplatValue's and values matching Predicate?
6669 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6670 return Value == *SplatValue || Predicate(Value);
6671 })) // Then we shall replace values matching predicate with SplatValue.
6672 Replacement = *SplatValue;
6673 }
6674 if (!Replacement) {
6675 // Oops, we did not find the "baseline" splat value.
6676 if (!AlternativeReplacement)
6677 return; // Nothing to do.
6678 // Let's replace with provided value then.
6679 Replacement = AlternativeReplacement;
6680 }
6681 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6682}
6683
6684/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6685/// where the divisor is constant and the comparison target is zero,
6686/// return a DAG expression that will generate the same comparison result
6687/// using only multiplications, additions and shifts/rotations.
6688/// Ref: "Hacker's Delight" 10-17.
6689SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6690 SDValue CompTargetNode,
6692 DAGCombinerInfo &DCI,
6693 const SDLoc &DL) const {
6695 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6696 DCI, DL, Built)) {
6697 for (SDNode *N : Built)
6698 DCI.AddToWorklist(N);
6699 return Folded;
6700 }
6701
6702 return SDValue();
6703}
6704
6705SDValue
6706TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6707 SDValue CompTargetNode, ISD::CondCode Cond,
6708 DAGCombinerInfo &DCI, const SDLoc &DL,
6709 SmallVectorImpl<SDNode *> &Created) const {
6710 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6711 // - D must be constant, with D = D0 * 2^K where D0 is odd
6712 // - P is the multiplicative inverse of D0 modulo 2^W
6713 // - Q = floor(((2^W) - 1) / D)
6714 // where W is the width of the common type of N and D.
6715 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6716 "Only applicable for (in)equality comparisons.");
6717
6718 SelectionDAG &DAG = DCI.DAG;
6719
6720 EVT VT = REMNode.getValueType();
6721 EVT SVT = VT.getScalarType();
6722 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6723 EVT ShSVT = ShVT.getScalarType();
6724
6725 // If MUL is unavailable, we cannot proceed in any case.
6726 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6727 return SDValue();
6728
6729 bool ComparingWithAllZeros = true;
6730 bool AllComparisonsWithNonZerosAreTautological = true;
6731 bool HadTautologicalLanes = false;
6732 bool AllLanesAreTautological = true;
6733 bool HadEvenDivisor = false;
6734 bool AllDivisorsArePowerOfTwo = true;
6735 bool HadTautologicalInvertedLanes = false;
6736 SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6737
6738 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6739 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6740 if (CDiv->isZero())
6741 return false;
6742
6743 const APInt &D = CDiv->getAPIntValue();
6744 const APInt &Cmp = CCmp->getAPIntValue();
6745
6746 ComparingWithAllZeros &= Cmp.isZero();
6747
6748 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6749 // if C2 is not less than C1, the comparison is always false.
6750 // But we will only be able to produce the comparison that will give the
6751 // opposive tautological answer. So this lane would need to be fixed up.
6752 bool TautologicalInvertedLane = D.ule(Cmp);
6753 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6754
6755 // If all lanes are tautological (either all divisors are ones, or divisor
6756 // is not greater than the constant we are comparing with),
6757 // we will prefer to avoid the fold.
6758 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6759 HadTautologicalLanes |= TautologicalLane;
6760 AllLanesAreTautological &= TautologicalLane;
6761
6762 // If we are comparing with non-zero, we need'll need to subtract said
6763 // comparison value from the LHS. But there is no point in doing that if
6764 // every lane where we are comparing with non-zero is tautological..
6765 if (!Cmp.isZero())
6766 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6767
6768 // Decompose D into D0 * 2^K
6769 unsigned K = D.countr_zero();
6770 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6771 APInt D0 = D.lshr(K);
6772
6773 // D is even if it has trailing zeros.
6774 HadEvenDivisor |= (K != 0);
6775 // D is a power-of-two if D0 is one.
6776 // If all divisors are power-of-two, we will prefer to avoid the fold.
6777 AllDivisorsArePowerOfTwo &= D0.isOne();
6778
6779 // P = inv(D0, 2^W)
6780 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6781 unsigned W = D.getBitWidth();
6783 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6784
6785 // Q = floor((2^W - 1) u/ D)
6786 // R = ((2^W - 1) u% D)
6787 APInt Q, R;
6789
6790 // If we are comparing with zero, then that comparison constant is okay,
6791 // else it may need to be one less than that.
6792 if (Cmp.ugt(R))
6793 Q -= 1;
6794
6796 "We are expecting that K is always less than all-ones for ShSVT");
6797
6798 // If the lane is tautological the result can be constant-folded.
6799 if (TautologicalLane) {
6800 // Set P and K amount to a bogus values so we can try to splat them.
6801 P = 0;
6802 K = -1;
6803 // And ensure that comparison constant is tautological,
6804 // it will always compare true/false.
6805 Q = -1;
6806 }
6807
6808 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6809 KAmts.push_back(
6810 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6811 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6812 return true;
6813 };
6814
6815 SDValue N = REMNode.getOperand(0);
6816 SDValue D = REMNode.getOperand(1);
6817
6818 // Collect the values from each element.
6819 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6820 return SDValue();
6821
6822 // If all lanes are tautological, the result can be constant-folded.
6823 if (AllLanesAreTautological)
6824 return SDValue();
6825
6826 // If this is a urem by a powers-of-two, avoid the fold since it can be
6827 // best implemented as a bit test.
6828 if (AllDivisorsArePowerOfTwo)
6829 return SDValue();
6830
6831 SDValue PVal, KVal, QVal;
6832 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6833 if (HadTautologicalLanes) {
6834 // Try to turn PAmts into a splat, since we don't care about the values
6835 // that are currently '0'. If we can't, just keep '0'`s.
6837 // Try to turn KAmts into a splat, since we don't care about the values
6838 // that are currently '-1'. If we can't, change them to '0'`s.
6840 DAG.getConstant(0, DL, ShSVT));
6841 }
6842
6843 PVal = DAG.getBuildVector(VT, DL, PAmts);
6844 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6845 QVal = DAG.getBuildVector(VT, DL, QAmts);
6846 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6847 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6848 "Expected matchBinaryPredicate to return one element for "
6849 "SPLAT_VECTORs");
6850 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6851 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6852 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6853 } else {
6854 PVal = PAmts[0];
6855 KVal = KAmts[0];
6856 QVal = QAmts[0];
6857 }
6858
6859 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6860 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6861 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6862 assert(CompTargetNode.getValueType() == N.getValueType() &&
6863 "Expecting that the types on LHS and RHS of comparisons match.");
6864 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6865 }
6866
6867 // (mul N, P)
6868 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6869 Created.push_back(Op0.getNode());
6870
6871 // Rotate right only if any divisor was even. We avoid rotates for all-odd
6872 // divisors as a performance improvement, since rotating by 0 is a no-op.
6873 if (HadEvenDivisor) {
6874 // We need ROTR to do this.
6875 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6876 return SDValue();
6877 // UREM: (rotr (mul N, P), K)
6878 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6879 Created.push_back(Op0.getNode());
6880 }
6881
6882 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6883 SDValue NewCC =
6884 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6886 if (!HadTautologicalInvertedLanes)
6887 return NewCC;
6888
6889 // If any lanes previously compared always-false, the NewCC will give
6890 // always-true result for them, so we need to fixup those lanes.
6891 // Or the other way around for inequality predicate.
6892 assert(VT.isVector() && "Can/should only get here for vectors.");
6893 Created.push_back(NewCC.getNode());
6894
6895 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6896 // if C2 is not less than C1, the comparison is always false.
6897 // But we have produced the comparison that will give the
6898 // opposive tautological answer. So these lanes would need to be fixed up.
6899 SDValue TautologicalInvertedChannels =
6900 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6901 Created.push_back(TautologicalInvertedChannels.getNode());
6902
6903 // NOTE: we avoid letting illegal types through even if we're before legalize
6904 // ops – legalization has a hard time producing good code for this.
6905 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6906 // If we have a vector select, let's replace the comparison results in the
6907 // affected lanes with the correct tautological result.
6908 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6909 DL, SETCCVT, SETCCVT);
6910 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6911 Replacement, NewCC);
6912 }
6913
6914 // Else, we can just invert the comparison result in the appropriate lanes.
6915 //
6916 // NOTE: see the note above VSELECT above.
6917 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6918 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6919 TautologicalInvertedChannels);
6920
6921 return SDValue(); // Don't know how to lower.
6922}
6923
6924/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6925/// where the divisor is constant and the comparison target is zero,
6926/// return a DAG expression that will generate the same comparison result
6927/// using only multiplications, additions and shifts/rotations.
6928/// Ref: "Hacker's Delight" 10-17.
6929SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6930 SDValue CompTargetNode,
6932 DAGCombinerInfo &DCI,
6933 const SDLoc &DL) const {
6935 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6936 DCI, DL, Built)) {
6937 assert(Built.size() <= 7 && "Max size prediction failed.");
6938 for (SDNode *N : Built)
6939 DCI.AddToWorklist(N);
6940 return Folded;
6941 }
6942
6943 return SDValue();
6944}
6945
6946SDValue
6947TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6948 SDValue CompTargetNode, ISD::CondCode Cond,
6949 DAGCombinerInfo &DCI, const SDLoc &DL,
6950 SmallVectorImpl<SDNode *> &Created) const {
6951 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6952 // Fold:
6953 // (seteq/ne (srem N, D), 0)
6954 // To:
6955 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
6956 //
6957 // - D must be constant, with D = D0 * 2^K where D0 is odd
6958 // - P is the multiplicative inverse of D0 modulo 2^W
6959 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6960 // - Q = floor((2 * A) / (2^K))
6961 // where W is the width of the common type of N and D.
6962 //
6963 // When D is a power of two (and thus D0 is 1), the normal
6964 // formula for A and Q don't apply, because the derivation
6965 // depends on D not dividing 2^(W-1), and thus theorem ZRS
6966 // does not apply. This specifically fails when N = INT_MIN.
6967 //
6968 // Instead, for power-of-two D, we use:
6969 // - A = 2^(W-1)
6970 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
6971 // - Q = 2^(W-K) - 1
6972 // |-> Test that the top K bits are zero after rotation
6973 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6974 "Only applicable for (in)equality comparisons.");
6975
6976 SelectionDAG &DAG = DCI.DAG;
6977
6978 EVT VT = REMNode.getValueType();
6979 EVT SVT = VT.getScalarType();
6980 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6981 EVT ShSVT = ShVT.getScalarType();
6982
6983 // If we are after ops legalization, and MUL is unavailable, we can not
6984 // proceed.
6985 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6986 return SDValue();
6987
6988 // TODO: Could support comparing with non-zero too.
6989 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
6990 if (!CompTarget || !CompTarget->isZero())
6991 return SDValue();
6992
6993 bool HadIntMinDivisor = false;
6994 bool HadOneDivisor = false;
6995 bool AllDivisorsAreOnes = true;
6996 bool HadEvenDivisor = false;
6997 bool NeedToApplyOffset = false;
6998 bool AllDivisorsArePowerOfTwo = true;
6999 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
7000
7001 auto BuildSREMPattern = [&](ConstantSDNode *C) {
7002 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
7003 if (C->isZero())
7004 return false;
7005
7006 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
7007
7008 // WARNING: this fold is only valid for positive divisors!
7009 APInt D = C->getAPIntValue();
7010 if (D.isNegative())
7011 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
7012
7013 HadIntMinDivisor |= D.isMinSignedValue();
7014
7015 // If all divisors are ones, we will prefer to avoid the fold.
7016 HadOneDivisor |= D.isOne();
7017 AllDivisorsAreOnes &= D.isOne();
7018
7019 // Decompose D into D0 * 2^K
7020 unsigned K = D.countr_zero();
7021 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
7022 APInt D0 = D.lshr(K);
7023
7024 if (!D.isMinSignedValue()) {
7025 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
7026 // we don't care about this lane in this fold, we'll special-handle it.
7027 HadEvenDivisor |= (K != 0);
7028 }
7029
7030 // D is a power-of-two if D0 is one. This includes INT_MIN.
7031 // If all divisors are power-of-two, we will prefer to avoid the fold.
7032 AllDivisorsArePowerOfTwo &= D0.isOne();
7033
7034 // P = inv(D0, 2^W)
7035 // 2^W requires W + 1 bits, so we have to extend and then truncate.
7036 unsigned W = D.getBitWidth();
7038 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
7039
7040 // A = floor((2^(W - 1) - 1) / D0) & -2^K
7042 A.clearLowBits(K);
7043
7044 if (!D.isMinSignedValue()) {
7045 // If divisor INT_MIN, then we don't care about this lane in this fold,
7046 // we'll special-handle it.
7047 NeedToApplyOffset |= A != 0;
7048 }
7049
7050 // Q = floor((2 * A) / (2^K))
7051 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
7052
7054 "We are expecting that A is always less than all-ones for SVT");
7056 "We are expecting that K is always less than all-ones for ShSVT");
7057
7058 // If D was a power of two, apply the alternate constant derivation.
7059 if (D0.isOne()) {
7060 // A = 2^(W-1)
7062 // - Q = 2^(W-K) - 1
7063 Q = APInt::getAllOnes(W - K).zext(W);
7064 }
7065
7066 // If the divisor is 1 the result can be constant-folded. Likewise, we
7067 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
7068 if (D.isOne()) {
7069 // Set P, A and K to a bogus values so we can try to splat them.
7070 P = 0;
7071 A = -1;
7072 K = -1;
7073
7074 // x ?% 1 == 0 <--> true <--> x u<= -1
7075 Q = -1;
7076 }
7077
7078 PAmts.push_back(DAG.getConstant(P, DL, SVT));
7079 AAmts.push_back(DAG.getConstant(A, DL, SVT));
7080 KAmts.push_back(
7081 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
7082 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
7083 return true;
7084 };
7085
7086 SDValue N = REMNode.getOperand(0);
7087 SDValue D = REMNode.getOperand(1);
7088
7089 // Collect the values from each element.
7090 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
7091 return SDValue();
7092
7093 // If this is a srem by a one, avoid the fold since it can be constant-folded.
7094 if (AllDivisorsAreOnes)
7095 return SDValue();
7096
7097 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
7098 // since it can be best implemented as a bit test.
7099 if (AllDivisorsArePowerOfTwo)
7100 return SDValue();
7101
7102 SDValue PVal, AVal, KVal, QVal;
7103 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7104 if (HadOneDivisor) {
7105 // Try to turn PAmts into a splat, since we don't care about the values
7106 // that are currently '0'. If we can't, just keep '0'`s.
7108 // Try to turn AAmts into a splat, since we don't care about the
7109 // values that are currently '-1'. If we can't, change them to '0'`s.
7111 DAG.getConstant(0, DL, SVT));
7112 // Try to turn KAmts into a splat, since we don't care about the values
7113 // that are currently '-1'. If we can't, change them to '0'`s.
7115 DAG.getConstant(0, DL, ShSVT));
7116 }
7117
7118 PVal = DAG.getBuildVector(VT, DL, PAmts);
7119 AVal = DAG.getBuildVector(VT, DL, AAmts);
7120 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7121 QVal = DAG.getBuildVector(VT, DL, QAmts);
7122 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7123 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7124 QAmts.size() == 1 &&
7125 "Expected matchUnaryPredicate to return one element for scalable "
7126 "vectors");
7127 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7128 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7129 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7130 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7131 } else {
7132 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7133 PVal = PAmts[0];
7134 AVal = AAmts[0];
7135 KVal = KAmts[0];
7136 QVal = QAmts[0];
7137 }
7138
7139 // (mul N, P)
7140 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7141 Created.push_back(Op0.getNode());
7142
7143 if (NeedToApplyOffset) {
7144 // We need ADD to do this.
7145 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7146 return SDValue();
7147
7148 // (add (mul N, P), A)
7149 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7150 Created.push_back(Op0.getNode());
7151 }
7152
7153 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7154 // divisors as a performance improvement, since rotating by 0 is a no-op.
7155 if (HadEvenDivisor) {
7156 // We need ROTR to do this.
7157 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7158 return SDValue();
7159 // SREM: (rotr (add (mul N, P), A), K)
7160 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7161 Created.push_back(Op0.getNode());
7162 }
7163
7164 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7165 SDValue Fold =
7166 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7168
7169 // If we didn't have lanes with INT_MIN divisor, then we're done.
7170 if (!HadIntMinDivisor)
7171 return Fold;
7172
7173 // That fold is only valid for positive divisors. Which effectively means,
7174 // it is invalid for INT_MIN divisors. So if we have such a lane,
7175 // we must fix-up results for said lanes.
7176 assert(VT.isVector() && "Can/should only get here for vectors.");
7177
7178 // NOTE: we avoid letting illegal types through even if we're before legalize
7179 // ops – legalization has a hard time producing good code for the code that
7180 // follows.
7181 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7185 return SDValue();
7186
7187 Created.push_back(Fold.getNode());
7188
7189 SDValue IntMin = DAG.getConstant(
7191 SDValue IntMax = DAG.getConstant(
7193 SDValue Zero =
7195
7196 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7197 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7198 Created.push_back(DivisorIsIntMin.getNode());
7199
7200 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7201 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7202 Created.push_back(Masked.getNode());
7203 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7204 Created.push_back(MaskedIsZero.getNode());
7205
7206 // To produce final result we need to blend 2 vectors: 'SetCC' and
7207 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7208 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7209 // constant-folded, select can get lowered to a shuffle with constant mask.
7210 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7211 MaskedIsZero, Fold);
7212
7213 return Blended;
7214}
7215
7218 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7219 DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7220 "be a constant integer");
7221 return true;
7222 }
7223
7224 return false;
7225}
7226
7228 const DenormalMode &Mode) const {
7229 SDLoc DL(Op);
7230 EVT VT = Op.getValueType();
7231 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7232 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7233
7234 // This is specifically a check for the handling of denormal inputs, not the
7235 // result.
7236 if (Mode.Input == DenormalMode::PreserveSign ||
7237 Mode.Input == DenormalMode::PositiveZero) {
7238 // Test = X == 0.0
7239 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7240 }
7241
7242 // Testing it with denormal inputs to avoid wrong estimate.
7243 //
7244 // Test = fabs(X) < SmallestNormal
7245 const fltSemantics &FltSem = VT.getFltSemantics();
7246 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7247 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7248 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7249 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7250}
7251
7253 bool LegalOps, bool OptForSize,
7255 unsigned Depth) const {
7256 // fneg is removable even if it has multiple uses.
7257 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7259 return Op.getOperand(0);
7260 }
7261
7262 // Don't recurse exponentially.
7264 return SDValue();
7265
7266 // Pre-increment recursion depth for use in recursive calls.
7267 ++Depth;
7268 const SDNodeFlags Flags = Op->getFlags();
7269 const TargetOptions &Options = DAG.getTarget().Options;
7270 EVT VT = Op.getValueType();
7271 unsigned Opcode = Op.getOpcode();
7272
7273 // Don't allow anything with multiple uses unless we know it is free.
7274 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7275 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7276 isFPExtFree(VT, Op.getOperand(0).getValueType());
7277 if (!IsFreeExtend)
7278 return SDValue();
7279 }
7280
7281 auto RemoveDeadNode = [&](SDValue N) {
7282 if (N && N.getNode()->use_empty())
7283 DAG.RemoveDeadNode(N.getNode());
7284 };
7285
7286 SDLoc DL(Op);
7287
7288 // Because getNegatedExpression can delete nodes we need a handle to keep
7289 // temporary nodes alive in case the recursion manages to create an identical
7290 // node.
7291 std::list<HandleSDNode> Handles;
7292
7293 switch (Opcode) {
7294 case ISD::ConstantFP: {
7295 // Don't invert constant FP values after legalization unless the target says
7296 // the negated constant is legal.
7297 bool IsOpLegal =
7299 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7300 OptForSize);
7301
7302 if (LegalOps && !IsOpLegal)
7303 break;
7304
7305 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7306 V.changeSign();
7307 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7308
7309 // If we already have the use of the negated floating constant, it is free
7310 // to negate it even it has multiple uses.
7311 if (!Op.hasOneUse() && CFP.use_empty())
7312 break;
7314 return CFP;
7315 }
7316 case ISD::BUILD_VECTOR: {
7317 // Only permit BUILD_VECTOR of constants.
7318 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7319 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7320 }))
7321 break;
7322
7323 bool IsOpLegal =
7326 llvm::all_of(Op->op_values(), [&](SDValue N) {
7327 return N.isUndef() ||
7328 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7329 OptForSize);
7330 });
7331
7332 if (LegalOps && !IsOpLegal)
7333 break;
7334
7336 for (SDValue C : Op->op_values()) {
7337 if (C.isUndef()) {
7338 Ops.push_back(C);
7339 continue;
7340 }
7341 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7342 V.changeSign();
7343 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7344 }
7346 return DAG.getBuildVector(VT, DL, Ops);
7347 }
7348 case ISD::FADD: {
7349 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7350 break;
7351
7352 // After operation legalization, it might not be legal to create new FSUBs.
7353 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7354 break;
7355 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7356
7357 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7359 SDValue NegX =
7360 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7361 // Prevent this node from being deleted by the next call.
7362 if (NegX)
7363 Handles.emplace_back(NegX);
7364
7365 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7367 SDValue NegY =
7368 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7369
7370 // We're done with the handles.
7371 Handles.clear();
7372
7373 // Negate the X if its cost is less or equal than Y.
7374 if (NegX && (CostX <= CostY)) {
7375 Cost = CostX;
7376 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7377 if (NegY != N)
7378 RemoveDeadNode(NegY);
7379 return N;
7380 }
7381
7382 // Negate the Y if it is not expensive.
7383 if (NegY) {
7384 Cost = CostY;
7385 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7386 if (NegX != N)
7387 RemoveDeadNode(NegX);
7388 return N;
7389 }
7390 break;
7391 }
7392 case ISD::FSUB: {
7393 // We can't turn -(A-B) into B-A when we honor signed zeros.
7394 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7395 break;
7396
7397 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7398 // fold (fneg (fsub 0, Y)) -> Y
7399 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7400 if (C->isZero()) {
7402 return Y;
7403 }
7404
7405 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7407 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7408 }
7409 case ISD::FMUL:
7410 case ISD::FDIV: {
7411 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7412
7413 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7415 SDValue NegX =
7416 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7417 // Prevent this node from being deleted by the next call.
7418 if (NegX)
7419 Handles.emplace_back(NegX);
7420
7421 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7423 SDValue NegY =
7424 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7425
7426 // We're done with the handles.
7427 Handles.clear();
7428
7429 // Negate the X if its cost is less or equal than Y.
7430 if (NegX && (CostX <= CostY)) {
7431 Cost = CostX;
7432 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7433 if (NegY != N)
7434 RemoveDeadNode(NegY);
7435 return N;
7436 }
7437
7438 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7439 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7440 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7441 break;
7442
7443 // Negate the Y if it is not expensive.
7444 if (NegY) {
7445 Cost = CostY;
7446 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7447 if (NegX != N)
7448 RemoveDeadNode(NegX);
7449 return N;
7450 }
7451 break;
7452 }
7453 case ISD::FMA:
7454 case ISD::FMAD: {
7455 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7456 break;
7457
7458 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7460 SDValue NegZ =
7461 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7462 // Give up if fail to negate the Z.
7463 if (!NegZ)
7464 break;
7465
7466 // Prevent this node from being deleted by the next two calls.
7467 Handles.emplace_back(NegZ);
7468
7469 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7471 SDValue NegX =
7472 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7473 // Prevent this node from being deleted by the next call.
7474 if (NegX)
7475 Handles.emplace_back(NegX);
7476
7477 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7479 SDValue NegY =
7480 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7481
7482 // We're done with the handles.
7483 Handles.clear();
7484
7485 // Negate the X if its cost is less or equal than Y.
7486 if (NegX && (CostX <= CostY)) {
7487 Cost = std::min(CostX, CostZ);
7488 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7489 if (NegY != N)
7490 RemoveDeadNode(NegY);
7491 return N;
7492 }
7493
7494 // Negate the Y if it is not expensive.
7495 if (NegY) {
7496 Cost = std::min(CostY, CostZ);
7497 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7498 if (NegX != N)
7499 RemoveDeadNode(NegX);
7500 return N;
7501 }
7502 break;
7503 }
7504
7505 case ISD::FP_EXTEND:
7506 case ISD::FSIN:
7507 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7508 OptForSize, Cost, Depth))
7509 return DAG.getNode(Opcode, DL, VT, NegV);
7510 break;
7511 case ISD::FP_ROUND:
7512 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7513 OptForSize, Cost, Depth))
7514 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7515 break;
7516 case ISD::SELECT:
7517 case ISD::VSELECT: {
7518 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7519 // iff at least one cost is cheaper and the other is neutral/cheaper
7520 SDValue LHS = Op.getOperand(1);
7522 SDValue NegLHS =
7523 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7524 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7525 RemoveDeadNode(NegLHS);
7526 break;
7527 }
7528
7529 // Prevent this node from being deleted by the next call.
7530 Handles.emplace_back(NegLHS);
7531
7532 SDValue RHS = Op.getOperand(2);
7534 SDValue NegRHS =
7535 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7536
7537 // We're done with the handles.
7538 Handles.clear();
7539
7540 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7541 (CostLHS != NegatibleCost::Cheaper &&
7542 CostRHS != NegatibleCost::Cheaper)) {
7543 RemoveDeadNode(NegLHS);
7544 RemoveDeadNode(NegRHS);
7545 break;
7546 }
7547
7548 Cost = std::min(CostLHS, CostRHS);
7549 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7550 }
7551 }
7552
7553 return SDValue();
7554}
7555
7556//===----------------------------------------------------------------------===//
7557// Legalization Utilities
7558//===----------------------------------------------------------------------===//
7559
7560bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7561 SDValue LHS, SDValue RHS,
7563 EVT HiLoVT, SelectionDAG &DAG,
7564 MulExpansionKind Kind, SDValue LL,
7565 SDValue LH, SDValue RL, SDValue RH) const {
7566 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7567 Opcode == ISD::SMUL_LOHI);
7568
7569 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7571 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7573 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7575 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7577
7578 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7579 return false;
7580
7581 unsigned OuterBitSize = VT.getScalarSizeInBits();
7582 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7583
7584 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7585 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7586 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7587
7588 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7589 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7590 bool Signed) -> bool {
7591 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7592 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7593 Hi = SDValue(Lo.getNode(), 1);
7594 return true;
7595 }
7596 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7597 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7598 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7599 return true;
7600 }
7601 return false;
7602 };
7603
7604 SDValue Lo, Hi;
7605
7606 if (!LL.getNode() && !RL.getNode() &&
7608 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7609 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7610 }
7611
7612 if (!LL.getNode())
7613 return false;
7614
7615 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7616 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7617 DAG.MaskedValueIsZero(RHS, HighMask)) {
7618 // The inputs are both zero-extended.
7619 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7620 Result.push_back(Lo);
7621 Result.push_back(Hi);
7622 if (Opcode != ISD::MUL) {
7623 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7624 Result.push_back(Zero);
7625 Result.push_back(Zero);
7626 }
7627 return true;
7628 }
7629 }
7630
7631 if (!VT.isVector() && Opcode == ISD::MUL &&
7632 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7633 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7634 // The input values are both sign-extended.
7635 // TODO non-MUL case?
7636 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7637 Result.push_back(Lo);
7638 Result.push_back(Hi);
7639 return true;
7640 }
7641 }
7642
7643 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7644 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7645
7646 if (!LH.getNode() && !RH.getNode() &&
7649 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7650 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7651 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7652 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7653 }
7654
7655 if (!LH.getNode())
7656 return false;
7657
7658 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7659 return false;
7660
7661 Result.push_back(Lo);
7662
7663 if (Opcode == ISD::MUL) {
7664 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7665 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7666 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7667 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7668 Result.push_back(Hi);
7669 return true;
7670 }
7671
7672 // Compute the full width result.
7673 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7674 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7675 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7676 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7677 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7678 };
7679
7680 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7681 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7682 return false;
7683
7684 // This is effectively the add part of a multiply-add of half-sized operands,
7685 // so it cannot overflow.
7686 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7687
7688 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7689 return false;
7690
7691 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7692 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7693
7694 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7696 if (UseGlue)
7697 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7698 Merge(Lo, Hi));
7699 else
7700 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7701 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7702
7703 SDValue Carry = Next.getValue(1);
7704 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7705 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7706
7707 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7708 return false;
7709
7710 if (UseGlue)
7711 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7712 Carry);
7713 else
7714 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7715 Zero, Carry);
7716
7717 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7718
7719 if (Opcode == ISD::SMUL_LOHI) {
7720 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7721 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7722 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7723
7724 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7725 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7726 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7727 }
7728
7729 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7730 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7731 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7732 return true;
7733}
7734
7736 SelectionDAG &DAG, MulExpansionKind Kind,
7737 SDValue LL, SDValue LH, SDValue RL,
7738 SDValue RH) const {
7740 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7741 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7742 DAG, Kind, LL, LH, RL, RH);
7743 if (Ok) {
7744 assert(Result.size() == 2);
7745 Lo = Result[0];
7746 Hi = Result[1];
7747 }
7748 return Ok;
7749}
7750
7751// Optimize unsigned division or remainder by constants for types twice as large
7752// as a legal VT.
7753//
7754// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7755// can be computed
7756// as:
7757// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7758// Remainder = Sum % Constant
7759// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7760//
7761// For division, we can compute the remainder using the algorithm described
7762// above, subtract it from the dividend to get an exact multiple of Constant.
7763// Then multiply that exact multiply by the multiplicative inverse modulo
7764// (1 << (BitWidth / 2)) to get the quotient.
7765
7766// If Constant is even, we can shift right the dividend and the divisor by the
7767// number of trailing zeros in Constant before applying the remainder algorithm.
7768// If we're after the quotient, we can subtract this value from the shifted
7769// dividend and multiply by the multiplicative inverse of the shifted divisor.
7770// If we want the remainder, we shift the value left by the number of trailing
7771// zeros and add the bits that were shifted out of the dividend.
7774 EVT HiLoVT, SelectionDAG &DAG,
7775 SDValue LL, SDValue LH) const {
7776 unsigned Opcode = N->getOpcode();
7777 EVT VT = N->getValueType(0);
7778
7779 // TODO: Support signed division/remainder.
7780 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7781 return false;
7782 assert(
7783 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7784 "Unexpected opcode");
7785
7786 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7787 if (!CN)
7788 return false;
7789
7790 APInt Divisor = CN->getAPIntValue();
7791 unsigned BitWidth = Divisor.getBitWidth();
7792 unsigned HBitWidth = BitWidth / 2;
7794 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7795
7796 // Divisor needs to less than (1 << HBitWidth).
7797 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7798 if (Divisor.uge(HalfMaxPlus1))
7799 return false;
7800
7801 // We depend on the UREM by constant optimization in DAGCombiner that requires
7802 // high multiply.
7803 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7805 return false;
7806
7807 // Don't expand if optimizing for size.
7808 if (DAG.shouldOptForSize())
7809 return false;
7810
7811 // Early out for 0 or 1 divisors.
7812 if (Divisor.ule(1))
7813 return false;
7814
7815 // If the divisor is even, shift it until it becomes odd.
7816 unsigned TrailingZeros = 0;
7817 if (!Divisor[0]) {
7818 TrailingZeros = Divisor.countr_zero();
7819 Divisor.lshrInPlace(TrailingZeros);
7820 }
7821
7822 SDLoc dl(N);
7823 SDValue Sum;
7824 SDValue PartialRem;
7825
7826 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7827 // then add in the carry.
7828 // TODO: If we can't split it in half, we might be able to split into 3 or
7829 // more pieces using a smaller bit width.
7830 if (HalfMaxPlus1.urem(Divisor).isOne()) {
7831 assert(!LL == !LH && "Expected both input halves or no input halves!");
7832 if (!LL)
7833 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7834
7835 // Shift the input by the number of TrailingZeros in the divisor. The
7836 // shifted out bits will be added to the remainder later.
7837 if (TrailingZeros) {
7838 // Save the shifted off bits if we need the remainder.
7839 if (Opcode != ISD::UDIV) {
7840 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7841 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7842 DAG.getConstant(Mask, dl, HiLoVT));
7843 }
7844
7845 LL = DAG.getNode(
7846 ISD::OR, dl, HiLoVT,
7847 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7848 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7849 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7850 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7851 HiLoVT, dl)));
7852 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7853 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7854 }
7855
7856 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7857 EVT SetCCType =
7858 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7860 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7861 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7862 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7863 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7864 } else {
7865 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7866 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7867 // If the boolean for the target is 0 or 1, we can add the setcc result
7868 // directly.
7869 if (getBooleanContents(HiLoVT) ==
7871 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7872 else
7873 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7874 DAG.getConstant(0, dl, HiLoVT));
7875 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7876 }
7877 }
7878
7879 // If we didn't find a sum, we can't do the expansion.
7880 if (!Sum)
7881 return false;
7882
7883 // Perform a HiLoVT urem on the Sum using truncated divisor.
7884 SDValue RemL =
7885 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7886 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7887 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7888
7889 if (Opcode != ISD::UREM) {
7890 // Subtract the remainder from the shifted dividend.
7891 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7892 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7893
7894 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7895
7896 // Multiply by the multiplicative inverse of the divisor modulo
7897 // (1 << BitWidth).
7898 APInt MulFactor = Divisor.multiplicativeInverse();
7899
7900 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7901 DAG.getConstant(MulFactor, dl, VT));
7902
7903 // Split the quotient into low and high parts.
7904 SDValue QuotL, QuotH;
7905 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7906 Result.push_back(QuotL);
7907 Result.push_back(QuotH);
7908 }
7909
7910 if (Opcode != ISD::UDIV) {
7911 // If we shifted the input, shift the remainder left and add the bits we
7912 // shifted off the input.
7913 if (TrailingZeros) {
7914 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7915 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7916 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7917 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7918 }
7919 Result.push_back(RemL);
7920 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7921 }
7922
7923 return true;
7924}
7925
7926// Check that (every element of) Z is undef or not an exact multiple of BW.
7927static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7929 Z,
7930 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7931 true);
7932}
7933
7935 EVT VT = Node->getValueType(0);
7936 SDValue ShX, ShY;
7937 SDValue ShAmt, InvShAmt;
7938 SDValue X = Node->getOperand(0);
7939 SDValue Y = Node->getOperand(1);
7940 SDValue Z = Node->getOperand(2);
7941 SDValue Mask = Node->getOperand(3);
7942 SDValue VL = Node->getOperand(4);
7943
7944 unsigned BW = VT.getScalarSizeInBits();
7945 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7946 SDLoc DL(SDValue(Node, 0));
7947
7948 EVT ShVT = Z.getValueType();
7949 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7950 // fshl: X << C | Y >> (BW - C)
7951 // fshr: X << (BW - C) | Y >> C
7952 // where C = Z % BW is not zero
7953 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7954 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7955 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7956 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
7957 VL);
7958 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
7959 VL);
7960 } else {
7961 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7962 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7963 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
7964 if (isPowerOf2_32(BW)) {
7965 // Z % BW -> Z & (BW - 1)
7966 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
7967 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7968 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
7969 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
7970 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
7971 } else {
7972 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7973 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7974 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
7975 }
7976
7977 SDValue One = DAG.getConstant(1, DL, ShVT);
7978 if (IsFSHL) {
7979 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
7980 SDValue ShY1 = DAG.getNode(ISD::VP_SRL, DL, VT, Y, One, Mask, VL);
7981 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, ShY1, InvShAmt, Mask, VL);
7982 } else {
7983 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
7984 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
7985 ShY = DAG.getNode(ISD::VP_SRL, DL, VT, Y, ShAmt, Mask, VL);
7986 }
7987 }
7988 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
7989}
7990
7992 SelectionDAG &DAG) const {
7993 if (Node->isVPOpcode())
7994 return expandVPFunnelShift(Node, DAG);
7995
7996 EVT VT = Node->getValueType(0);
7997
7998 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
8002 return SDValue();
8003
8004 SDValue X = Node->getOperand(0);
8005 SDValue Y = Node->getOperand(1);
8006 SDValue Z = Node->getOperand(2);
8007
8008 unsigned BW = VT.getScalarSizeInBits();
8009 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
8010 SDLoc DL(SDValue(Node, 0));
8011
8012 EVT ShVT = Z.getValueType();
8013
8014 // If a funnel shift in the other direction is more supported, use it.
8015 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
8016 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8017 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
8018 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8019 // fshl X, Y, Z -> fshr X, Y, -Z
8020 // fshr X, Y, Z -> fshl X, Y, -Z
8021 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8022 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
8023 } else {
8024 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
8025 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
8026 SDValue One = DAG.getConstant(1, DL, ShVT);
8027 if (IsFSHL) {
8028 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8029 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
8030 } else {
8031 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
8032 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
8033 }
8034 Z = DAG.getNOT(DL, Z, ShVT);
8035 }
8036 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
8037 }
8038
8039 SDValue ShX, ShY;
8040 SDValue ShAmt, InvShAmt;
8041 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
8042 // fshl: X << C | Y >> (BW - C)
8043 // fshr: X << (BW - C) | Y >> C
8044 // where C = Z % BW is not zero
8045 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8046 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8047 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
8048 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
8049 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
8050 } else {
8051 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
8052 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
8053 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
8054 if (isPowerOf2_32(BW)) {
8055 // Z % BW -> Z & (BW - 1)
8056 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
8057 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
8058 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
8059 } else {
8060 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
8061 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
8062 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
8063 }
8064
8065 SDValue One = DAG.getConstant(1, DL, ShVT);
8066 if (IsFSHL) {
8067 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
8068 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
8069 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
8070 } else {
8071 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
8072 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
8073 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
8074 }
8075 }
8076 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
8077}
8078
8079// TODO: Merge with expandFunnelShift.
8080SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
8081 SelectionDAG &DAG) const {
8082 EVT VT = Node->getValueType(0);
8083 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8084 bool IsLeft = Node->getOpcode() == ISD::ROTL;
8085 SDValue Op0 = Node->getOperand(0);
8086 SDValue Op1 = Node->getOperand(1);
8087 SDLoc DL(SDValue(Node, 0));
8088
8089 EVT ShVT = Op1.getValueType();
8090 SDValue Zero = DAG.getConstant(0, DL, ShVT);
8091
8092 // If a rotate in the other direction is more supported, use it.
8093 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
8094 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
8095 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
8096 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8097 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
8098 }
8099
8100 if (!AllowVectorOps && VT.isVector() &&
8106 return SDValue();
8107
8108 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8109 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8110 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8111 SDValue ShVal;
8112 SDValue HsVal;
8113 if (isPowerOf2_32(EltSizeInBits)) {
8114 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8115 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8116 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8117 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8118 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8119 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8120 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8121 } else {
8122 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8123 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8124 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8125 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8126 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8127 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8128 SDValue One = DAG.getConstant(1, DL, ShVT);
8129 HsVal =
8130 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8131 }
8132 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8133}
8134
8136 SelectionDAG &DAG) const {
8137 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8138 EVT VT = Node->getValueType(0);
8139 unsigned VTBits = VT.getScalarSizeInBits();
8140 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8141
8142 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8143 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8144 SDValue ShOpLo = Node->getOperand(0);
8145 SDValue ShOpHi = Node->getOperand(1);
8146 SDValue ShAmt = Node->getOperand(2);
8147 EVT ShAmtVT = ShAmt.getValueType();
8148 EVT ShAmtCCVT =
8149 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8150 SDLoc dl(Node);
8151
8152 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8153 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8154 // away during isel.
8155 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8156 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8157 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8158 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8159 : DAG.getConstant(0, dl, VT);
8160
8161 SDValue Tmp2, Tmp3;
8162 if (IsSHL) {
8163 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8164 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8165 } else {
8166 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8167 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8168 }
8169
8170 // If the shift amount is larger or equal than the width of a part we don't
8171 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8172 // values for large shift amounts.
8173 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8174 DAG.getConstant(VTBits, dl, ShAmtVT));
8175 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8176 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8177
8178 if (IsSHL) {
8179 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8180 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8181 } else {
8182 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8183 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8184 }
8185}
8186
8188 SelectionDAG &DAG) const {
8189 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8190 SDValue Src = Node->getOperand(OpNo);
8191 EVT SrcVT = Src.getValueType();
8192 EVT DstVT = Node->getValueType(0);
8193 SDLoc dl(SDValue(Node, 0));
8194
8195 // FIXME: Only f32 to i64 conversions are supported.
8196 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8197 return false;
8198
8199 if (Node->isStrictFPOpcode())
8200 // When a NaN is converted to an integer a trap is allowed. We can't
8201 // use this expansion here because it would eliminate that trap. Other
8202 // traps are also allowed and cannot be eliminated. See
8203 // IEEE 754-2008 sec 5.8.
8204 return false;
8205
8206 // Expand f32 -> i64 conversion
8207 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8208 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8209 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8210 EVT IntVT = SrcVT.changeTypeToInteger();
8211 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8212
8213 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8214 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8215 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8216 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8217 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8218 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8219
8220 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8221
8222 SDValue ExponentBits = DAG.getNode(
8223 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8224 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8225 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8226
8227 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8228 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8229 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8230 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8231
8232 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8233 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8234 DAG.getConstant(0x00800000, dl, IntVT));
8235
8236 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8237
8238 R = DAG.getSelectCC(
8239 dl, Exponent, ExponentLoBit,
8240 DAG.getNode(ISD::SHL, dl, DstVT, R,
8241 DAG.getZExtOrTrunc(
8242 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8243 dl, IntShVT)),
8244 DAG.getNode(ISD::SRL, dl, DstVT, R,
8245 DAG.getZExtOrTrunc(
8246 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8247 dl, IntShVT)),
8248 ISD::SETGT);
8249
8250 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8251 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8252
8253 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8254 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8255 return true;
8256}
8257
8259 SDValue &Chain,
8260 SelectionDAG &DAG) const {
8261 SDLoc dl(SDValue(Node, 0));
8262 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8263 SDValue Src = Node->getOperand(OpNo);
8264
8265 EVT SrcVT = Src.getValueType();
8266 EVT DstVT = Node->getValueType(0);
8267 EVT SetCCVT =
8268 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8269 EVT DstSetCCVT =
8270 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8271
8272 // Only expand vector types if we have the appropriate vector bit operations.
8273 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8275 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8277 return false;
8278
8279 // If the maximum float value is smaller then the signed integer range,
8280 // the destination signmask can't be represented by the float, so we can
8281 // just use FP_TO_SINT directly.
8282 const fltSemantics &APFSem = SrcVT.getFltSemantics();
8283 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8284 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8286 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8287 if (Node->isStrictFPOpcode()) {
8288 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8289 { Node->getOperand(0), Src });
8290 Chain = Result.getValue(1);
8291 } else
8292 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8293 return true;
8294 }
8295
8296 // Don't expand it if there isn't cheap fsub instruction.
8298 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8299 return false;
8300
8301 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8302 SDValue Sel;
8303
8304 if (Node->isStrictFPOpcode()) {
8305 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8306 Node->getOperand(0), /*IsSignaling*/ true);
8307 Chain = Sel.getValue(1);
8308 } else {
8309 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8310 }
8311
8312 bool Strict = Node->isStrictFPOpcode() ||
8313 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8314
8315 if (Strict) {
8316 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8317 // signmask then offset (the result of which should be fully representable).
8318 // Sel = Src < 0x8000000000000000
8319 // FltOfs = select Sel, 0, 0x8000000000000000
8320 // IntOfs = select Sel, 0, 0x8000000000000000
8321 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8322
8323 // TODO: Should any fast-math-flags be set for the FSUB?
8324 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8325 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8326 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8327 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8328 DAG.getConstant(0, dl, DstVT),
8329 DAG.getConstant(SignMask, dl, DstVT));
8330 SDValue SInt;
8331 if (Node->isStrictFPOpcode()) {
8332 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8333 { Chain, Src, FltOfs });
8334 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8335 { Val.getValue(1), Val });
8336 Chain = SInt.getValue(1);
8337 } else {
8338 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8339 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8340 }
8341 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8342 } else {
8343 // Expand based on maximum range of FP_TO_SINT:
8344 // True = fp_to_sint(Src)
8345 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8346 // Result = select (Src < 0x8000000000000000), True, False
8347
8348 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8349 // TODO: Should any fast-math-flags be set for the FSUB?
8350 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8351 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8352 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8353 DAG.getConstant(SignMask, dl, DstVT));
8354 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8355 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8356 }
8357 return true;
8358}
8359
8361 SDValue &Chain,
8362 SelectionDAG &DAG) const {
8363 // This transform is not correct for converting 0 when rounding mode is set
8364 // to round toward negative infinity which will produce -0.0. So disable under
8365 // strictfp.
8366 if (Node->isStrictFPOpcode())
8367 return false;
8368
8369 SDValue Src = Node->getOperand(0);
8370 EVT SrcVT = Src.getValueType();
8371 EVT DstVT = Node->getValueType(0);
8372
8373 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8374 return false;
8375
8376 // Only expand vector types if we have the appropriate vector bit operations.
8377 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8382 return false;
8383
8384 SDLoc dl(SDValue(Node, 0));
8385 EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
8386
8387 // Implementation of unsigned i64 to f64 following the algorithm in
8388 // __floatundidf in compiler_rt. This implementation performs rounding
8389 // correctly in all rounding modes with the exception of converting 0
8390 // when rounding toward negative infinity. In that case the fsub will produce
8391 // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
8392 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8393 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8394 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8395 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8396 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8397 SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
8398
8399 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8400 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8401 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8402 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8403 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8404 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8405 SDValue HiSub =
8406 DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8407 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8408 return true;
8409}
8410
8411SDValue
8413 SelectionDAG &DAG) const {
8414 unsigned Opcode = Node->getOpcode();
8415 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8416 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8417 "Wrong opcode");
8418
8419 if (Node->getFlags().hasNoNaNs()) {
8420 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8421 SDValue Op1 = Node->getOperand(0);
8422 SDValue Op2 = Node->getOperand(1);
8423 SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8424 // Copy FMF flags, but always set the no-signed-zeros flag
8425 // as this is implied by the FMINNUM/FMAXNUM semantics.
8426 SDNodeFlags Flags = Node->getFlags();
8427 Flags.setNoSignedZeros(true);
8428 SelCC->setFlags(Flags);
8429 return SelCC;
8430 }
8431
8432 return SDValue();
8433}
8434
8436 SelectionDAG &DAG) const {
8437 SDLoc dl(Node);
8438 unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
8440 EVT VT = Node->getValueType(0);
8441
8442 if (VT.isScalableVector())
8444 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8445
8446 if (isOperationLegalOrCustom(NewOp, VT)) {
8447 SDValue Quiet0 = Node->getOperand(0);
8448 SDValue Quiet1 = Node->getOperand(1);
8449
8450 if (!Node->getFlags().hasNoNaNs()) {
8451 // Insert canonicalizes if it's possible we need to quiet to get correct
8452 // sNaN behavior.
8453 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8454 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8455 Node->getFlags());
8456 }
8457 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8458 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8459 Node->getFlags());
8460 }
8461 }
8462
8463 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8464 }
8465
8466 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8467 // instead if there are no NaNs and there can't be an incompatible zero
8468 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8469 if ((Node->getFlags().hasNoNaNs() ||
8470 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8471 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8472 (Node->getFlags().hasNoSignedZeros() ||
8473 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8474 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8475 unsigned IEEE2018Op =
8476 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8477 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8478 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8479 Node->getOperand(1), Node->getFlags());
8480 }
8481
8482 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8483 return SelCC;
8484
8485 return SDValue();
8486}
8487
8489 SelectionDAG &DAG) const {
8490 SDLoc DL(N);
8491 SDValue LHS = N->getOperand(0);
8492 SDValue RHS = N->getOperand(1);
8493 unsigned Opc = N->getOpcode();
8494 EVT VT = N->getValueType(0);
8495 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8496 bool IsMax = Opc == ISD::FMAXIMUM;
8497 SDNodeFlags Flags = N->getFlags();
8498
8499 // First, implement comparison not propagating NaN. If no native fmin or fmax
8500 // available, use plain select with setcc instead.
8502 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8503 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8504
8505 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8506 // signed zero behavior.
8507 bool MinMaxMustRespectOrderedZero = false;
8508
8509 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8510 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS, Flags);
8511 MinMaxMustRespectOrderedZero = true;
8512 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8513 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS, Flags);
8514 } else {
8516 return DAG.UnrollVectorOp(N);
8517
8518 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8519 SDValue Compare =
8520 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETOGT : ISD::SETOLT);
8521 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS, Flags);
8522 }
8523
8524 // Propagate any NaN of both operands
8525 if (!N->getFlags().hasNoNaNs() &&
8526 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8527 ConstantFP *FPNaN = ConstantFP::get(*DAG.getContext(),
8529 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8530 DAG.getConstantFP(*FPNaN, DL, VT), MinMax, Flags);
8531 }
8532
8533 // fminimum/fmaximum requires -0.0 less than +0.0
8534 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8536 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8537 DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8538 SDValue TestZero =
8539 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8540 SDValue LCmp = DAG.getSelect(
8541 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8542 MinMax, Flags);
8543 SDValue RCmp = DAG.getSelect(
8544 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8545 LCmp, Flags);
8546 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8547 }
8548
8549 return MinMax;
8550}
8551
8553 SelectionDAG &DAG) const {
8554 SDLoc DL(Node);
8555 SDValue LHS = Node->getOperand(0);
8556 SDValue RHS = Node->getOperand(1);
8557 unsigned Opc = Node->getOpcode();
8558 EVT VT = Node->getValueType(0);
8559 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8560 bool IsMax = Opc == ISD::FMAXIMUMNUM;
8561 const TargetOptions &Options = DAG.getTarget().Options;
8562 SDNodeFlags Flags = Node->getFlags();
8563
8564 unsigned NewOp =
8566
8567 if (isOperationLegalOrCustom(NewOp, VT)) {
8568 if (!Flags.hasNoNaNs()) {
8569 // Insert canonicalizes if it's possible we need to quiet to get correct
8570 // sNaN behavior.
8571 if (!DAG.isKnownNeverSNaN(LHS)) {
8572 LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
8573 }
8574 if (!DAG.isKnownNeverSNaN(RHS)) {
8575 RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
8576 }
8577 }
8578
8579 return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
8580 }
8581
8582 // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
8583 // same behaviors for all of other cases: +0.0 vs -0.0 included.
8584 if (Flags.hasNoNaNs() ||
8585 (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
8586 unsigned IEEE2019Op =
8588 if (isOperationLegalOrCustom(IEEE2019Op, VT))
8589 return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
8590 }
8591
8592 // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
8593 // either one for +0.0 vs -0.0.
8594 if ((Flags.hasNoNaNs() ||
8595 (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
8596 (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
8597 DAG.isKnownNeverZeroFloat(RHS))) {
8598 unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
8599 if (isOperationLegalOrCustom(IEEE2008Op, VT))
8600 return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
8601 }
8602
8603 // If only one operand is NaN, override it with another operand.
8604 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
8605 LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
8606 }
8607 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
8608 RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
8609 }
8610
8611 SDValue MinMax =
8612 DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8613 // If MinMax is NaN, let's quiet it.
8614 if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
8615 !DAG.isKnownNeverNaN(RHS)) {
8616 SDValue MinMaxQuiet =
8617 DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
8618 MinMax =
8619 DAG.getSelectCC(DL, MinMax, MinMax, MinMaxQuiet, MinMax, ISD::SETUO);
8620 }
8621
8622 // Fixup signed zero behavior.
8623 if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
8625 return MinMax;
8626 }
8627 SDValue TestZero =
8628 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8629 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8630 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8631 SDValue LCmp = DAG.getSelect(
8632 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8633 MinMax, Flags);
8634 SDValue RCmp = DAG.getSelect(
8635 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8636 Flags);
8637 return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8638}
8639
8640/// Returns a true value if if this FPClassTest can be performed with an ordered
8641/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8642/// std::nullopt if it cannot be performed as a compare with 0.
8643static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8644 const fltSemantics &Semantics,
8645 const MachineFunction &MF) {
8646 FPClassTest OrderedMask = Test & ~fcNan;
8647 FPClassTest NanTest = Test & fcNan;
8648 bool IsOrdered = NanTest == fcNone;
8649 bool IsUnordered = NanTest == fcNan;
8650
8651 // Skip cases that are testing for only a qnan or snan.
8652 if (!IsOrdered && !IsUnordered)
8653 return std::nullopt;
8654
8655 if (OrderedMask == fcZero &&
8656 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8657 return IsOrdered;
8658 if (OrderedMask == (fcZero | fcSubnormal) &&
8659 MF.getDenormalMode(Semantics).inputsAreZero())
8660 return IsOrdered;
8661 return std::nullopt;
8662}
8663
8665 const FPClassTest OrigTestMask,
8666 SDNodeFlags Flags, const SDLoc &DL,
8667 SelectionDAG &DAG) const {
8668 EVT OperandVT = Op.getValueType();
8669 assert(OperandVT.isFloatingPoint());
8670 FPClassTest Test = OrigTestMask;
8671
8672 // Degenerated cases.
8673 if (Test == fcNone)
8674 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8675 if ((Test & fcAllFlags) == fcAllFlags)
8676 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8677
8678 // PPC double double is a pair of doubles, of which the higher part determines
8679 // the value class.
8680 if (OperandVT == MVT::ppcf128) {
8681 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8682 DAG.getConstant(1, DL, MVT::i32));
8683 OperandVT = MVT::f64;
8684 }
8685
8686 // Some checks may be represented as inversion of simpler check, for example
8687 // "inf|normal|subnormal|zero" => !"nan".
8688 bool IsInverted = false;
8689 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8690 IsInverted = true;
8691 Test = InvertedCheck;
8692 }
8693
8694 // Floating-point type properties.
8695 EVT ScalarFloatVT = OperandVT.getScalarType();
8696 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8697 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8698 bool IsF80 = (ScalarFloatVT == MVT::f80);
8699
8700 // Some checks can be implemented using float comparisons, if floating point
8701 // exceptions are ignored.
8702 if (Flags.hasNoFPExcept() &&
8704 FPClassTest FPTestMask = Test;
8705
8706 ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8707 ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8708
8709 // See if we can fold an | fcNan into an unordered compare.
8710 FPClassTest OrderedFPTestMask = FPTestMask & ~fcNan;
8711
8712 // Can't fold the ordered check if we're only testing for snan or qnan
8713 // individually.
8714 if ((FPTestMask & fcNan) != fcNan)
8715 OrderedFPTestMask = FPTestMask;
8716
8717 const bool IsOrdered = FPTestMask == OrderedFPTestMask;
8718
8719 if (std::optional<bool> IsCmp0 =
8720 isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
8721 IsCmp0 && (isCondCodeLegalOrCustom(
8722 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8723 OperandVT.getScalarType().getSimpleVT()))) {
8724
8725 // If denormals could be implicitly treated as 0, this is not equivalent
8726 // to a compare with 0 since it will also be true for denormals.
8727 return DAG.getSetCC(DL, ResultVT, Op,
8728 DAG.getConstantFP(0.0, DL, OperandVT),
8729 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8730 }
8731
8732 if (Test == fcNan &&
8734 OperandVT.getScalarType().getSimpleVT())) {
8735 return DAG.getSetCC(DL, ResultVT, Op, Op,
8736 IsInverted ? ISD::SETO : ISD::SETUO);
8737 }
8738
8739 if (Test == fcInf &&
8741 OperandVT.getScalarType().getSimpleVT()) &&
8743 // isinf(x) --> fabs(x) == inf
8744 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8745 SDValue Inf =
8746 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8747 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8748 IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8749 }
8750
8751 if (OrderedFPTestMask == (fcSubnormal | fcZero) && !IsOrdered) {
8752 // TODO: Could handle ordered case, but it produces worse code for
8753 // x86. Maybe handle ordered if fabs is free?
8754
8755 ISD::CondCode OrderedOp = IsInverted ? ISD::SETUGE : ISD::SETOLT;
8756 ISD::CondCode UnorderedOp = IsInverted ? ISD::SETOGE : ISD::SETULT;
8757
8758 if (isCondCodeLegalOrCustom(IsOrdered ? OrderedOp : UnorderedOp,
8759 OperandVT.getScalarType().getSimpleVT())) {
8760 // (issubnormal(x) || iszero(x)) --> fabs(x) < smallest_normal
8761
8762 // TODO: Maybe only makes sense if fabs is free. Integer test of
8763 // exponent bits seems better for x86.
8764 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8765 SDValue SmallestNormal = DAG.getConstantFP(
8766 APFloat::getSmallestNormalized(Semantics), DL, OperandVT);
8767 return DAG.getSetCC(DL, ResultVT, Abs, SmallestNormal,
8768 IsOrdered ? OrderedOp : UnorderedOp);
8769 }
8770 }
8771 }
8772
8773 // In the general case use integer operations.
8774 unsigned BitSize = OperandVT.getScalarSizeInBits();
8775 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8776 if (OperandVT.isVector())
8777 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8778 OperandVT.getVectorElementCount());
8779 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8780
8781 // Various masks.
8782 APInt SignBit = APInt::getSignMask(BitSize);
8783 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8784 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8785 const unsigned ExplicitIntBitInF80 = 63;
8786 APInt ExpMask = Inf;
8787 if (IsF80)
8788 ExpMask.clearBit(ExplicitIntBitInF80);
8789 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8790 APInt QNaNBitMask =
8791 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8792 APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8793
8794 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8795 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8796 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8797 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8798 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8799 SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8800
8801 SDValue Res;
8802 const auto appendResult = [&](SDValue PartialRes) {
8803 if (PartialRes) {
8804 if (Res)
8805 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8806 else
8807 Res = PartialRes;
8808 }
8809 };
8810
8811 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8812 const auto getIntBitIsSet = [&]() -> SDValue {
8813 if (!IntBitIsSetV) {
8814 APInt IntBitMask(BitSize, 0);
8815 IntBitMask.setBit(ExplicitIntBitInF80);
8816 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8817 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8818 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8819 }
8820 return IntBitIsSetV;
8821 };
8822
8823 // Split the value into sign bit and absolute value.
8824 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8825 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8826 DAG.getConstant(0, DL, IntVT), ISD::SETLT);
8827
8828 // Tests that involve more than one class should be processed first.
8829 SDValue PartialRes;
8830
8831 if (IsF80)
8832 ; // Detect finite numbers of f80 by checking individual classes because
8833 // they have different settings of the explicit integer bit.
8834 else if ((Test & fcFinite) == fcFinite) {
8835 // finite(V) ==> abs(V) < exp_mask
8836 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8837 Test &= ~fcFinite;
8838 } else if ((Test & fcFinite) == fcPosFinite) {
8839 // finite(V) && V > 0 ==> V < exp_mask
8840 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8841 Test &= ~fcPosFinite;
8842 } else if ((Test & fcFinite) == fcNegFinite) {
8843 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8844 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8845 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8846 Test &= ~fcNegFinite;
8847 }
8848 appendResult(PartialRes);
8849
8850 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8851 // fcZero | fcSubnormal => test all exponent bits are 0
8852 // TODO: Handle sign bit specific cases
8853 if (PartialCheck == (fcZero | fcSubnormal)) {
8854 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8855 SDValue ExpIsZero =
8856 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8857 appendResult(ExpIsZero);
8858 Test &= ~PartialCheck & fcAllFlags;
8859 }
8860 }
8861
8862 // Check for individual classes.
8863
8864 if (unsigned PartialCheck = Test & fcZero) {
8865 if (PartialCheck == fcPosZero)
8866 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8867 else if (PartialCheck == fcZero)
8868 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8869 else // ISD::fcNegZero
8870 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8871 appendResult(PartialRes);
8872 }
8873
8874 if (unsigned PartialCheck = Test & fcSubnormal) {
8875 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8876 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8877 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8878 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8879 SDValue VMinusOneV =
8880 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8881 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8882 if (PartialCheck == fcNegSubnormal)
8883 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8884 appendResult(PartialRes);
8885 }
8886
8887 if (unsigned PartialCheck = Test & fcInf) {
8888 if (PartialCheck == fcPosInf)
8889 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
8890 else if (PartialCheck == fcInf)
8891 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
8892 else { // ISD::fcNegInf
8893 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8894 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
8895 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
8896 }
8897 appendResult(PartialRes);
8898 }
8899
8900 if (unsigned PartialCheck = Test & fcNan) {
8901 APInt InfWithQnanBit = Inf | QNaNBitMask;
8902 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
8903 if (PartialCheck == fcNan) {
8904 // isnan(V) ==> abs(V) > int(inf)
8905 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8906 if (IsF80) {
8907 // Recognize unsupported values as NaNs for compatibility with glibc.
8908 // In them (exp(V)==0) == int_bit.
8909 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
8910 SDValue ExpIsZero =
8911 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8912 SDValue IsPseudo =
8913 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
8914 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
8915 }
8916 } else if (PartialCheck == fcQNan) {
8917 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
8918 PartialRes =
8919 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
8920 } else { // ISD::fcSNan
8921 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
8922 // abs(V) < (unsigned(Inf) | quiet_bit)
8923 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8924 SDValue IsNotQnan =
8925 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
8926 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
8927 }
8928 appendResult(PartialRes);
8929 }
8930
8931 if (unsigned PartialCheck = Test & fcNormal) {
8932 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8933 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8934 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
8935 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
8936 APInt ExpLimit = ExpMask - ExpLSB;
8937 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
8938 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
8939 if (PartialCheck == fcNegNormal)
8940 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8941 else if (PartialCheck == fcPosNormal) {
8942 SDValue PosSignV =
8943 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
8944 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
8945 }
8946 if (IsF80)
8947 PartialRes =
8948 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
8949 appendResult(PartialRes);
8950 }
8951
8952 if (!Res)
8953 return DAG.getConstant(IsInverted, DL, ResultVT);
8954 if (IsInverted)
8955 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
8956 return Res;
8957}
8958
8959// Only expand vector types if we have the appropriate vector bit operations.
8960static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
8961 assert(VT.isVector() && "Expected vector type");
8962 unsigned Len = VT.getScalarSizeInBits();
8963 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
8966 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
8968}
8969
8971 SDLoc dl(Node);
8972 EVT VT = Node->getValueType(0);
8973 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8974 SDValue Op = Node->getOperand(0);
8975 unsigned Len = VT.getScalarSizeInBits();
8976 assert(VT.isInteger() && "CTPOP not implemented for this type.");
8977
8978 // TODO: Add support for irregular type lengths.
8979 if (!(Len <= 128 && Len % 8 == 0))
8980 return SDValue();
8981
8982 // Only expand vector types if we have the appropriate vector bit operations.
8983 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
8984 return SDValue();
8985
8986 // This is the "best" algorithm from
8987 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8988 SDValue Mask55 =
8989 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8990 SDValue Mask33 =
8991 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8992 SDValue Mask0F =
8993 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8994
8995 // v = v - ((v >> 1) & 0x55555555...)
8996 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
8997 DAG.getNode(ISD::AND, dl, VT,
8998 DAG.getNode(ISD::SRL, dl, VT, Op,
8999 DAG.getConstant(1, dl, ShVT)),
9000 Mask55));
9001 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9002 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
9003 DAG.getNode(ISD::AND, dl, VT,
9004 DAG.getNode(ISD::SRL, dl, VT, Op,
9005 DAG.getConstant(2, dl, ShVT)),
9006 Mask33));
9007 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9008 Op = DAG.getNode(ISD::AND, dl, VT,
9009 DAG.getNode(ISD::ADD, dl, VT, Op,
9010 DAG.getNode(ISD::SRL, dl, VT, Op,
9011 DAG.getConstant(4, dl, ShVT))),
9012 Mask0F);
9013
9014 if (Len <= 8)
9015 return Op;
9016
9017 // Avoid the multiply if we only have 2 bytes to add.
9018 // TODO: Only doing this for scalars because vectors weren't as obviously
9019 // improved.
9020 if (Len == 16 && !VT.isVector()) {
9021 // v = (v + (v >> 8)) & 0x00FF;
9022 return DAG.getNode(ISD::AND, dl, VT,
9023 DAG.getNode(ISD::ADD, dl, VT, Op,
9024 DAG.getNode(ISD::SRL, dl, VT, Op,
9025 DAG.getConstant(8, dl, ShVT))),
9026 DAG.getConstant(0xFF, dl, VT));
9027 }
9028
9029 // v = (v * 0x01010101...) >> (Len - 8)
9030 SDValue V;
9033 SDValue Mask01 =
9034 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9035 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
9036 } else {
9037 V = Op;
9038 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9039 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9040 V = DAG.getNode(ISD::ADD, dl, VT, V,
9041 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
9042 }
9043 }
9044 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
9045}
9046
9048 SDLoc dl(Node);
9049 EVT VT = Node->getValueType(0);
9050 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9051 SDValue Op = Node->getOperand(0);
9052 SDValue Mask = Node->getOperand(1);
9053 SDValue VL = Node->getOperand(2);
9054 unsigned Len = VT.getScalarSizeInBits();
9055 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
9056
9057 // TODO: Add support for irregular type lengths.
9058 if (!(Len <= 128 && Len % 8 == 0))
9059 return SDValue();
9060
9061 // This is same algorithm of expandCTPOP from
9062 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
9063 SDValue Mask55 =
9064 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
9065 SDValue Mask33 =
9066 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
9067 SDValue Mask0F =
9068 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
9069
9070 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
9071
9072 // v = v - ((v >> 1) & 0x55555555...)
9073 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
9074 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9075 DAG.getConstant(1, dl, ShVT), Mask, VL),
9076 Mask55, Mask, VL);
9077 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
9078
9079 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
9080 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
9081 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
9082 DAG.getNode(ISD::VP_SRL, dl, VT, Op,
9083 DAG.getConstant(2, dl, ShVT), Mask, VL),
9084 Mask33, Mask, VL);
9085 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
9086
9087 // v = (v + (v >> 4)) & 0x0F0F0F0F...
9088 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
9089 Mask, VL),
9090 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
9091 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
9092
9093 if (Len <= 8)
9094 return Op;
9095
9096 // v = (v * 0x01010101...) >> (Len - 8)
9097 SDValue V;
9099 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
9100 SDValue Mask01 =
9101 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
9102 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
9103 } else {
9104 V = Op;
9105 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
9106 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
9107 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
9108 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
9109 Mask, VL);
9110 }
9111 }
9112 return DAG.getNode(ISD::VP_SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT),
9113 Mask, VL);
9114}
9115
9117 SDLoc dl(Node);
9118 EVT VT = Node->getValueType(0);
9119 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9120 SDValue Op = Node->getOperand(0);
9121 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9122
9123 // If the non-ZERO_UNDEF version is supported we can use that instead.
9124 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
9126 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
9127
9128 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9130 EVT SetCCVT =
9131 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9132 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
9133 SDValue Zero = DAG.getConstant(0, dl, VT);
9134 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9135 return DAG.getSelect(dl, VT, SrcIsZero,
9136 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
9137 }
9138
9139 // Only expand vector types if we have the appropriate vector bit operations.
9140 // This includes the operations needed to expand CTPOP if it isn't supported.
9141 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9143 !canExpandVectorCTPOP(*this, VT)) ||
9146 return SDValue();
9147
9148 // for now, we do this:
9149 // x = x | (x >> 1);
9150 // x = x | (x >> 2);
9151 // ...
9152 // x = x | (x >>16);
9153 // x = x | (x >>32); // for 64-bit input
9154 // return popcount(~x);
9155 //
9156 // Ref: "Hacker's Delight" by Henry Warren
9157 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9158 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9159 Op = DAG.getNode(ISD::OR, dl, VT, Op,
9160 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
9161 }
9162 Op = DAG.getNOT(dl, Op, VT);
9163 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
9164}
9165
9167 SDLoc dl(Node);
9168 EVT VT = Node->getValueType(0);
9169 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
9170 SDValue Op = Node->getOperand(0);
9171 SDValue Mask = Node->getOperand(1);
9172 SDValue VL = Node->getOperand(2);
9173 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9174
9175 // do this:
9176 // x = x | (x >> 1);
9177 // x = x | (x >> 2);
9178 // ...
9179 // x = x | (x >>16);
9180 // x = x | (x >>32); // for 64-bit input
9181 // return popcount(~x);
9182 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
9183 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
9184 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
9185 DAG.getNode(ISD::VP_SRL, dl, VT, Op, Tmp, Mask, VL), Mask,
9186 VL);
9187 }
9188 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getAllOnesConstant(dl, VT),
9189 Mask, VL);
9190 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
9191}
9192
9194 const SDLoc &DL, EVT VT, SDValue Op,
9195 unsigned BitWidth) const {
9196 if (BitWidth != 32 && BitWidth != 64)
9197 return SDValue();
9198 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
9199 : APInt(64, 0x0218A392CD3D5DBFULL);
9200 const DataLayout &TD = DAG.getDataLayout();
9201 MachinePointerInfo PtrInfo =
9203 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
9204 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
9205 SDValue Lookup = DAG.getNode(
9206 ISD::SRL, DL, VT,
9207 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
9208 DAG.getConstant(DeBruijn, DL, VT)),
9209 DAG.getConstant(ShiftAmt, DL, VT));
9211
9213 for (unsigned i = 0; i < BitWidth; i++) {
9214 APInt Shl = DeBruijn.shl(i);
9215 APInt Lshr = Shl.lshr(ShiftAmt);
9216 Table[Lshr.getZExtValue()] = i;
9217 }
9218
9219 // Create a ConstantArray in Constant Pool
9220 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
9221 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
9222 TD.getPrefTypeAlign(CA->getType()));
9223 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
9224 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9225 PtrInfo, MVT::i8);
9226 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9227 return ExtLoad;
9228
9229 EVT SetCCVT =
9230 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9231 SDValue Zero = DAG.getConstant(0, DL, VT);
9232 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9233 return DAG.getSelect(DL, VT, SrcIsZero,
9234 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9235}
9236
9238 SDLoc dl(Node);
9239 EVT VT = Node->getValueType(0);
9240 SDValue Op = Node->getOperand(0);
9241 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9242
9243 // If the non-ZERO_UNDEF version is supported we can use that instead.
9244 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9246 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9247
9248 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9250 EVT SetCCVT =
9251 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9252 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9253 SDValue Zero = DAG.getConstant(0, dl, VT);
9254 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9255 return DAG.getSelect(dl, VT, SrcIsZero,
9256 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9257 }
9258
9259 // Only expand vector types if we have the appropriate vector bit operations.
9260 // This includes the operations needed to expand CTPOP if it isn't supported.
9261 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9264 !canExpandVectorCTPOP(*this, VT)) ||
9268 return SDValue();
9269
9270 // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9271 if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
9273 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9274 return V;
9275
9276 // for now, we use: { return popcount(~x & (x - 1)); }
9277 // unless the target has ctlz but not ctpop, in which case we use:
9278 // { return 32 - nlz(~x & (x-1)); }
9279 // Ref: "Hacker's Delight" by Henry Warren
9280 SDValue Tmp = DAG.getNode(
9281 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9282 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9283
9284 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9286 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9287 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9288 }
9289
9290 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9291}
9292
9294 SDValue Op = Node->getOperand(0);
9295 SDValue Mask = Node->getOperand(1);
9296 SDValue VL = Node->getOperand(2);
9297 SDLoc dl(Node);
9298 EVT VT = Node->getValueType(0);
9299
9300 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9301 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9302 DAG.getAllOnesConstant(dl, VT), Mask, VL);
9303 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9304 DAG.getConstant(1, dl, VT), Mask, VL);
9305 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9306 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9307}
9308
9310 SelectionDAG &DAG) const {
9311 // %cond = to_bool_vec %source
9312 // %splat = splat /*val=*/VL
9313 // %tz = step_vector
9314 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9315 // %r = vp.reduce.umin %v
9316 SDLoc DL(N);
9317 SDValue Source = N->getOperand(0);
9318 SDValue Mask = N->getOperand(1);
9319 SDValue EVL = N->getOperand(2);
9320 EVT SrcVT = Source.getValueType();
9321 EVT ResVT = N->getValueType(0);
9322 EVT ResVecVT =
9323 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9324
9325 // Convert to boolean vector.
9326 if (SrcVT.getScalarType() != MVT::i1) {
9327 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9328 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9329 SrcVT.getVectorElementCount());
9330 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9331 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9332 }
9333
9334 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9335 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9336 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9337 SDValue Select =
9338 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9339 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9340}
9341
9343 bool IsNegative) const {
9344 SDLoc dl(N);
9345 EVT VT = N->getValueType(0);
9346 SDValue Op = N->getOperand(0);
9347
9348 // abs(x) -> smax(x,sub(0,x))
9349 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9351 SDValue Zero = DAG.getConstant(0, dl, VT);
9352 Op = DAG.getFreeze(Op);
9353 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9354 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9355 }
9356
9357 // abs(x) -> umin(x,sub(0,x))
9358 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9360 SDValue Zero = DAG.getConstant(0, dl, VT);
9361 Op = DAG.getFreeze(Op);
9362 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9363 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9364 }
9365
9366 // 0 - abs(x) -> smin(x, sub(0,x))
9367 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9369 SDValue Zero = DAG.getConstant(0, dl, VT);
9370 Op = DAG.getFreeze(Op);
9371 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9372 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9373 }
9374
9375 // Only expand vector types if we have the appropriate vector operations.
9376 if (VT.isVector() &&
9378 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9379 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9381 return SDValue();
9382
9383 Op = DAG.getFreeze(Op);
9384 SDValue Shift = DAG.getNode(
9385 ISD::SRA, dl, VT, Op,
9386 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9387 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9388
9389 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9390 if (!IsNegative)
9391 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9392
9393 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9394 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9395}
9396
9398 SDLoc dl(N);
9399 EVT VT = N->getValueType(0);
9400 SDValue LHS = DAG.getFreeze(N->getOperand(0));
9401 SDValue RHS = DAG.getFreeze(N->getOperand(1));
9402 bool IsSigned = N->getOpcode() == ISD::ABDS;
9403
9404 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9405 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9406 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9407 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9408 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9409 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9410 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9411 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9412 }
9413
9414 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9415 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9416 return DAG.getNode(ISD::OR, dl, VT,
9417 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9418 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9419
9420 // If the subtract doesn't overflow then just use abs(sub())
9421 // NOTE: don't use frozen operands for value tracking.
9422 bool IsNonNegative = DAG.SignBitIsZero(N->getOperand(1)) &&
9423 DAG.SignBitIsZero(N->getOperand(0));
9424
9425 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(0),
9426 N->getOperand(1)))
9427 return DAG.getNode(ISD::ABS, dl, VT,
9428 DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9429
9430 if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, N->getOperand(1),
9431 N->getOperand(0)))
9432 return DAG.getNode(ISD::ABS, dl, VT,
9433 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9434
9435 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9437 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9438
9439 // Branchless expansion iff cmp result is allbits:
9440 // abds(lhs, rhs) -> sub(sgt(lhs, rhs), xor(sgt(lhs, rhs), sub(lhs, rhs)))
9441 // abdu(lhs, rhs) -> sub(ugt(lhs, rhs), xor(ugt(lhs, rhs), sub(lhs, rhs)))
9442 if (CCVT == VT && getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9443 SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9444 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Diff, Cmp);
9445 return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
9446 }
9447
9448 // Similar to the branchless expansion, use the (sign-extended) usubo overflow
9449 // flag if the (scalar) type is illegal as this is more likely to legalize
9450 // cleanly:
9451 // abdu(lhs, rhs) -> sub(xor(sub(lhs, rhs), uof(lhs, rhs)), uof(lhs, rhs))
9452 if (!IsSigned && VT.isScalarInteger() && !isTypeLegal(VT)) {
9453 SDValue USubO =
9454 DAG.getNode(ISD::USUBO, dl, DAG.getVTList(VT, MVT::i1), {LHS, RHS});
9455 SDValue Cmp = DAG.getNode(ISD::SIGN_EXTEND, dl, VT, USubO.getValue(1));
9456 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, USubO.getValue(0), Cmp);
9457 return DAG.getNode(ISD::SUB, dl, VT, Xor, Cmp);
9458 }
9459
9460 // FIXME: Should really try to split the vector in case it's legal on a
9461 // subvector.
9463 return DAG.UnrollVectorOp(N);
9464
9465 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9466 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9467 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9468 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9469}
9470
9472 SDLoc dl(N);
9473 EVT VT = N->getValueType(0);
9474 SDValue LHS = N->getOperand(0);
9475 SDValue RHS = N->getOperand(1);
9476
9477 unsigned Opc = N->getOpcode();
9478 bool IsFloor = Opc == ISD::AVGFLOORS || Opc == ISD::AVGFLOORU;
9479 bool IsSigned = Opc == ISD::AVGCEILS || Opc == ISD::AVGFLOORS;
9480 unsigned SumOpc = IsFloor ? ISD::ADD : ISD::SUB;
9481 unsigned SignOpc = IsFloor ? ISD::AND : ISD::OR;
9482 unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
9483 unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9484 assert((Opc == ISD::AVGFLOORS || Opc == ISD::AVGCEILS ||
9485 Opc == ISD::AVGFLOORU || Opc == ISD::AVGCEILU) &&
9486 "Unknown AVG node");
9487
9488 // If the operands are already extended, we can add+shift.
9489 bool IsExt =
9490 (IsSigned && DAG.ComputeNumSignBits(LHS) >= 2 &&
9491 DAG.ComputeNumSignBits(RHS) >= 2) ||
9492 (!IsSigned && DAG.computeKnownBits(LHS).countMinLeadingZeros() >= 1 &&
9494 if (IsExt) {
9495 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
9496 if (!IsFloor)
9497 Sum = DAG.getNode(ISD::ADD, dl, VT, Sum, DAG.getConstant(1, dl, VT));
9498 return DAG.getNode(ShiftOpc, dl, VT, Sum,
9499 DAG.getShiftAmountConstant(1, VT, dl));
9500 }
9501
9502 // For scalars, see if we can efficiently extend/truncate to use add+shift.
9503 if (VT.isScalarInteger()) {
9504 unsigned BW = VT.getScalarSizeInBits();
9505 EVT ExtVT = VT.getIntegerVT(*DAG.getContext(), 2 * BW);
9506 if (isTypeLegal(ExtVT) && isTruncateFree(ExtVT, VT)) {
9507 LHS = DAG.getNode(ExtOpc, dl, ExtVT, LHS);
9508 RHS = DAG.getNode(ExtOpc, dl, ExtVT, RHS);
9509 SDValue Avg = DAG.getNode(ISD::ADD, dl, ExtVT, LHS, RHS);
9510 if (!IsFloor)
9511 Avg = DAG.getNode(ISD::ADD, dl, ExtVT, Avg,
9512 DAG.getConstant(1, dl, ExtVT));
9513 // Just use SRL as we will be truncating away the extended sign bits.
9514 Avg = DAG.getNode(ISD::SRL, dl, ExtVT, Avg,
9515 DAG.getShiftAmountConstant(1, ExtVT, dl));
9516 return DAG.getNode(ISD::TRUNCATE, dl, VT, Avg);
9517 }
9518 }
9519
9520 // avgflooru(lhs, rhs) -> or(lshr(add(lhs, rhs),1),shl(overflow, typesize-1))
9521 if (Opc == ISD::AVGFLOORU && VT.isScalarInteger() && !isTypeLegal(VT)) {
9522 SDValue UAddWithOverflow =
9523 DAG.getNode(ISD::UADDO, dl, DAG.getVTList(VT, MVT::i1), {RHS, LHS});
9524
9525 SDValue Sum = UAddWithOverflow.getValue(0);
9526 SDValue Overflow = UAddWithOverflow.getValue(1);
9527
9528 // Right shift the sum by 1
9529 SDValue One = DAG.getShiftAmountConstant(1, VT, dl);
9530 SDValue LShrVal = DAG.getNode(ISD::SRL, dl, VT, Sum, One);
9531
9532 SDValue ZeroExtOverflow = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Overflow);
9533 SDValue OverflowShl =
9534 DAG.getNode(ISD::SHL, dl, VT, ZeroExtOverflow,
9535 DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT));
9536
9537 return DAG.getNode(ISD::OR, dl, VT, LShrVal, OverflowShl);
9538 }
9539
9540 // avgceils(lhs, rhs) -> sub(or(lhs,rhs),ashr(xor(lhs,rhs),1))
9541 // avgceilu(lhs, rhs) -> sub(or(lhs,rhs),lshr(xor(lhs,rhs),1))
9542 // avgfloors(lhs, rhs) -> add(and(lhs,rhs),ashr(xor(lhs,rhs),1))
9543 // avgflooru(lhs, rhs) -> add(and(lhs,rhs),lshr(xor(lhs,rhs),1))
9544 LHS = DAG.getFreeze(LHS);
9545 RHS = DAG.getFreeze(RHS);
9546 SDValue Sign = DAG.getNode(SignOpc, dl, VT, LHS, RHS);
9547 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9548 SDValue Shift =
9549 DAG.getNode(ShiftOpc, dl, VT, Xor, DAG.getShiftAmountConstant(1, VT, dl));
9550 return DAG.getNode(SumOpc, dl, VT, Sign, Shift);
9551}
9552
9554 SDLoc dl(N);
9555 EVT VT = N->getValueType(0);
9556 SDValue Op = N->getOperand(0);
9557
9558 if (!VT.isSimple())
9559 return SDValue();
9560
9561 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9562 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9563 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9564 default:
9565 return SDValue();
9566 case MVT::i16:
9567 // Use a rotate by 8. This can be further expanded if necessary.
9568 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9569 case MVT::i32:
9570 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9571 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9572 DAG.getConstant(0xFF00, dl, VT));
9573 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9574 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9575 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9576 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9577 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9578 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9579 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9580 case MVT::i64:
9581 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9582 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9583 DAG.getConstant(255ULL<<8, dl, VT));
9584 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9585 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9586 DAG.getConstant(255ULL<<16, dl, VT));
9587 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9588 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9589 DAG.getConstant(255ULL<<24, dl, VT));
9590 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9591 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9592 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9593 DAG.getConstant(255ULL<<24, dl, VT));
9594 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9595 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9596 DAG.getConstant(255ULL<<16, dl, VT));
9597 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9598 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9599 DAG.getConstant(255ULL<<8, dl, VT));
9600 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9601 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9602 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9603 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9604 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9605 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9606 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9607 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9608 }
9609}
9610
9612 SDLoc dl(N);
9613 EVT VT = N->getValueType(0);
9614 SDValue Op = N->getOperand(0);
9615 SDValue Mask = N->getOperand(1);
9616 SDValue EVL = N->getOperand(2);
9617
9618 if (!VT.isSimple())
9619 return SDValue();
9620
9621 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9622 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9623 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9624 default:
9625 return SDValue();
9626 case MVT::i16:
9627 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9628 Mask, EVL);
9629 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9630 Mask, EVL);
9631 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9632 case MVT::i32:
9633 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9634 Mask, EVL);
9635 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9636 Mask, EVL);
9637 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9638 Mask, EVL);
9639 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9640 Mask, EVL);
9641 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9642 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9643 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9644 Mask, EVL);
9645 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9646 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9647 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9648 case MVT::i64:
9649 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9650 Mask, EVL);
9651 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9652 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9653 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9654 Mask, EVL);
9655 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9656 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9657 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9658 Mask, EVL);
9659 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9660 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9661 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9662 Mask, EVL);
9663 Tmp4 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9664 Mask, EVL);
9665 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9666 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9667 Tmp3 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9668 Mask, EVL);
9669 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9670 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9671 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9672 Mask, EVL);
9673 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9674 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9675 Tmp1 = DAG.getNode(ISD::VP_SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9676 Mask, EVL);
9677 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9678 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9679 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9680 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9681 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9682 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9683 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9684 }
9685}
9686
9688 SDLoc dl(N);
9689 EVT VT = N->getValueType(0);
9690 SDValue Op = N->getOperand(0);
9691 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9692 unsigned Sz = VT.getScalarSizeInBits();
9693
9694 SDValue Tmp, Tmp2, Tmp3;
9695
9696 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9697 // and finally the i1 pairs.
9698 // TODO: We can easily support i4/i2 legal types if any target ever does.
9699 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9700 // Create the masks - repeating the pattern every byte.
9701 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9702 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9703 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9704
9705 // BSWAP if the type is wider than a single byte.
9706 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9707
9708 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9709 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9710 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9711 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9712 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9713 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9714
9715 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9716 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9717 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9718 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9719 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9720 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9721
9722 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9723 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9724 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9725 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9726 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9727 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9728 return Tmp;
9729 }
9730
9731 Tmp = DAG.getConstant(0, dl, VT);
9732 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9733 if (I < J)
9734 Tmp2 =
9735 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9736 else
9737 Tmp2 =
9738 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9739
9740 APInt Shift = APInt::getOneBitSet(Sz, J);
9741 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9742 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9743 }
9744
9745 return Tmp;
9746}
9747
9749 assert(N->getOpcode() == ISD::VP_BITREVERSE);
9750
9751 SDLoc dl(N);
9752 EVT VT = N->getValueType(0);
9753 SDValue Op = N->getOperand(0);
9754 SDValue Mask = N->getOperand(1);
9755 SDValue EVL = N->getOperand(2);
9756 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9757 unsigned Sz = VT.getScalarSizeInBits();
9758
9759 SDValue Tmp, Tmp2, Tmp3;
9760
9761 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9762 // and finally the i1 pairs.
9763 // TODO: We can easily support i4/i2 legal types if any target ever does.
9764 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9765 // Create the masks - repeating the pattern every byte.
9766 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9767 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9768 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9769
9770 // BSWAP if the type is wider than a single byte.
9771 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9772
9773 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9774 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9775 Mask, EVL);
9776 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9777 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9778 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9779 Mask, EVL);
9780 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9781 Mask, EVL);
9782 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9783
9784 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9785 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9786 Mask, EVL);
9787 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9788 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9789 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9790 Mask, EVL);
9791 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9792 Mask, EVL);
9793 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9794
9795 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9796 Tmp2 = DAG.getNode(ISD::VP_SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9797 Mask, EVL);
9798 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9799 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9800 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9801 Mask, EVL);
9802 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9803 Mask, EVL);
9804 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9805 return Tmp;
9806 }
9807 return SDValue();
9808}
9809
9810std::pair<SDValue, SDValue>
9812 SelectionDAG &DAG) const {
9813 SDLoc SL(LD);
9814 SDValue Chain = LD->getChain();
9815 SDValue BasePTR = LD->getBasePtr();
9816 EVT SrcVT = LD->getMemoryVT();
9817 EVT DstVT = LD->getValueType(0);
9818 ISD::LoadExtType ExtType = LD->getExtensionType();
9819
9820 if (SrcVT.isScalableVector())
9821 report_fatal_error("Cannot scalarize scalable vector loads");
9822
9823 unsigned NumElem = SrcVT.getVectorNumElements();
9824
9825 EVT SrcEltVT = SrcVT.getScalarType();
9826 EVT DstEltVT = DstVT.getScalarType();
9827
9828 // A vector must always be stored in memory as-is, i.e. without any padding
9829 // between the elements, since various code depend on it, e.g. in the
9830 // handling of a bitcast of a vector type to int, which may be done with a
9831 // vector store followed by an integer load. A vector that does not have
9832 // elements that are byte-sized must therefore be stored as an integer
9833 // built out of the extracted vector elements.
9834 if (!SrcEltVT.isByteSized()) {
9835 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9836 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9837
9838 unsigned NumSrcBits = SrcVT.getSizeInBits();
9839 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9840
9841 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9842 SDValue SrcEltBitMask = DAG.getConstant(
9843 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9844
9845 // Load the whole vector and avoid masking off the top bits as it makes
9846 // the codegen worse.
9847 SDValue Load =
9848 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9849 LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
9850 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9851
9853 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9854 unsigned ShiftIntoIdx =
9855 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9856 SDValue ShiftAmount = DAG.getShiftAmountConstant(
9857 ShiftIntoIdx * SrcEltVT.getSizeInBits(), LoadVT, SL);
9858 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
9859 SDValue Elt =
9860 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
9861 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
9862
9863 if (ExtType != ISD::NON_EXTLOAD) {
9864 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
9865 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
9866 }
9867
9868 Vals.push_back(Scalar);
9869 }
9870
9871 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9872 return std::make_pair(Value, Load.getValue(1));
9873 }
9874
9875 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9876 assert(SrcEltVT.isByteSized());
9877
9879 SmallVector<SDValue, 8> LoadChains;
9880
9881 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9882 SDValue ScalarLoad =
9883 DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
9884 LD->getPointerInfo().getWithOffset(Idx * Stride),
9885 SrcEltVT, LD->getOriginalAlign(),
9886 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9887
9888 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
9889
9890 Vals.push_back(ScalarLoad.getValue(0));
9891 LoadChains.push_back(ScalarLoad.getValue(1));
9892 }
9893
9894 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9895 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9896
9897 return std::make_pair(Value, NewChain);
9898}
9899
9901 SelectionDAG &DAG) const {
9902 SDLoc SL(ST);
9903
9904 SDValue Chain = ST->getChain();
9905 SDValue BasePtr = ST->getBasePtr();
9906 SDValue Value = ST->getValue();
9907 EVT StVT = ST->getMemoryVT();
9908
9909 if (StVT.isScalableVector())
9910 report_fatal_error("Cannot scalarize scalable vector stores");
9911
9912 // The type of the data we want to save
9913 EVT RegVT = Value.getValueType();
9914 EVT RegSclVT = RegVT.getScalarType();
9915
9916 // The type of data as saved in memory.
9917 EVT MemSclVT = StVT.getScalarType();
9918
9919 unsigned NumElem = StVT.getVectorNumElements();
9920
9921 // A vector must always be stored in memory as-is, i.e. without any padding
9922 // between the elements, since various code depend on it, e.g. in the
9923 // handling of a bitcast of a vector type to int, which may be done with a
9924 // vector store followed by an integer load. A vector that does not have
9925 // elements that are byte-sized must therefore be stored as an integer
9926 // built out of the extracted vector elements.
9927 if (!MemSclVT.isByteSized()) {
9928 unsigned NumBits = StVT.getSizeInBits();
9929 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
9930
9931 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
9932
9933 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9934 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9935 DAG.getVectorIdxConstant(Idx, SL));
9936 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
9937 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
9938 unsigned ShiftIntoIdx =
9939 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9940 SDValue ShiftAmount =
9941 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
9942 SDValue ShiftedElt =
9943 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
9944 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
9945 }
9946
9947 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
9948 ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9949 ST->getAAInfo());
9950 }
9951
9952 // Store Stride in bytes
9953 unsigned Stride = MemSclVT.getSizeInBits() / 8;
9954 assert(Stride && "Zero stride!");
9955 // Extract each of the elements from the original vector and save them into
9956 // memory individually.
9958 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9959 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9960 DAG.getVectorIdxConstant(Idx, SL));
9961
9962 SDValue Ptr =
9963 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
9964
9965 // This scalar TruncStore may be illegal, but we legalize it later.
9966 SDValue Store = DAG.getTruncStore(
9967 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
9968 MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9969 ST->getAAInfo());
9970
9971 Stores.push_back(Store);
9972 }
9973
9974 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
9975}
9976
9977std::pair<SDValue, SDValue>
9979 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
9980 "unaligned indexed loads not implemented!");
9981 SDValue Chain = LD->getChain();
9982 SDValue Ptr = LD->getBasePtr();
9983 EVT VT = LD->getValueType(0);
9984 EVT LoadedVT = LD->getMemoryVT();
9985 SDLoc dl(LD);
9986 auto &MF = DAG.getMachineFunction();
9987
9988 if (VT.isFloatingPoint() || VT.isVector()) {
9989 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
9990 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
9991 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
9992 LoadedVT.isVector()) {
9993 // Scalarize the load and let the individual components be handled.
9994 return scalarizeVectorLoad(LD, DAG);
9995 }
9996
9997 // Expand to a (misaligned) integer load of the same size,
9998 // then bitconvert to floating point or vector.
9999 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
10000 LD->getMemOperand());
10001 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
10002 if (LoadedVT != VT)
10003 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
10004 ISD::ANY_EXTEND, dl, VT, Result);
10005
10006 return std::make_pair(Result, newLoad.getValue(1));
10007 }
10008
10009 // Copy the value to a (aligned) stack slot using (unaligned) integer
10010 // loads and stores, then do a (aligned) load from the stack slot.
10011 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
10012 unsigned LoadedBytes = LoadedVT.getStoreSize();
10013 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10014 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
10015
10016 // Make sure the stack slot is also aligned for the register type.
10017 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
10018 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
10020 SDValue StackPtr = StackBase;
10021 unsigned Offset = 0;
10022
10023 EVT PtrVT = Ptr.getValueType();
10024 EVT StackPtrVT = StackPtr.getValueType();
10025
10026 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10027 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10028
10029 // Do all but one copies using the full register width.
10030 for (unsigned i = 1; i < NumRegs; i++) {
10031 // Load one integer register's worth from the original location.
10032 SDValue Load = DAG.getLoad(
10033 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
10034 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10035 LD->getAAInfo());
10036 // Follow the load with a store to the stack slot. Remember the store.
10037 Stores.push_back(DAG.getStore(
10038 Load.getValue(1), dl, Load, StackPtr,
10039 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
10040 // Increment the pointers.
10041 Offset += RegBytes;
10042
10043 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10044 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10045 }
10046
10047 // The last copy may be partial. Do an extending load.
10048 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
10049 8 * (LoadedBytes - Offset));
10050 SDValue Load =
10051 DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
10052 LD->getPointerInfo().getWithOffset(Offset), MemVT,
10053 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
10054 LD->getAAInfo());
10055 // Follow the load with a store to the stack slot. Remember the store.
10056 // On big-endian machines this requires a truncating store to ensure
10057 // that the bits end up in the right place.
10058 Stores.push_back(DAG.getTruncStore(
10059 Load.getValue(1), dl, Load, StackPtr,
10060 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
10061
10062 // The order of the stores doesn't matter - say it with a TokenFactor.
10063 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10064
10065 // Finally, perform the original load only redirected to the stack slot.
10066 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
10067 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
10068 LoadedVT);
10069
10070 // Callers expect a MERGE_VALUES node.
10071 return std::make_pair(Load, TF);
10072 }
10073
10074 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
10075 "Unaligned load of unsupported type.");
10076
10077 // Compute the new VT that is half the size of the old one. This is an
10078 // integer MVT.
10079 unsigned NumBits = LoadedVT.getSizeInBits();
10080 EVT NewLoadedVT;
10081 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
10082 NumBits >>= 1;
10083
10084 Align Alignment = LD->getOriginalAlign();
10085 unsigned IncrementSize = NumBits / 8;
10086 ISD::LoadExtType HiExtType = LD->getExtensionType();
10087
10088 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
10089 if (HiExtType == ISD::NON_EXTLOAD)
10090 HiExtType = ISD::ZEXTLOAD;
10091
10092 // Load the value in two parts
10093 SDValue Lo, Hi;
10094 if (DAG.getDataLayout().isLittleEndian()) {
10095 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10096 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10097 LD->getAAInfo());
10098
10099 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10100 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
10101 LD->getPointerInfo().getWithOffset(IncrementSize),
10102 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10103 LD->getAAInfo());
10104 } else {
10105 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
10106 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10107 LD->getAAInfo());
10108
10109 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10110 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
10111 LD->getPointerInfo().getWithOffset(IncrementSize),
10112 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
10113 LD->getAAInfo());
10114 }
10115
10116 // aggregate the two parts
10117 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
10118 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
10119 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
10120
10121 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
10122 Hi.getValue(1));
10123
10124 return std::make_pair(Result, TF);
10125}
10126
10128 SelectionDAG &DAG) const {
10129 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
10130 "unaligned indexed stores not implemented!");
10131 SDValue Chain = ST->getChain();
10132 SDValue Ptr = ST->getBasePtr();
10133 SDValue Val = ST->getValue();
10134 EVT VT = Val.getValueType();
10135 Align Alignment = ST->getOriginalAlign();
10136 auto &MF = DAG.getMachineFunction();
10137 EVT StoreMemVT = ST->getMemoryVT();
10138
10139 SDLoc dl(ST);
10140 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
10141 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10142 if (isTypeLegal(intVT)) {
10143 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
10144 StoreMemVT.isVector()) {
10145 // Scalarize the store and let the individual components be handled.
10146 SDValue Result = scalarizeVectorStore(ST, DAG);
10147 return Result;
10148 }
10149 // Expand to a bitconvert of the value to the integer type of the
10150 // same size, then a (misaligned) int store.
10151 // FIXME: Does not handle truncating floating point stores!
10152 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
10153 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
10154 Alignment, ST->getMemOperand()->getFlags());
10155 return Result;
10156 }
10157 // Do a (aligned) store to a stack slot, then copy from the stack slot
10158 // to the final destination using (unaligned) integer loads and stores.
10159 MVT RegVT = getRegisterType(
10160 *DAG.getContext(),
10161 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
10162 EVT PtrVT = Ptr.getValueType();
10163 unsigned StoredBytes = StoreMemVT.getStoreSize();
10164 unsigned RegBytes = RegVT.getSizeInBits() / 8;
10165 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
10166
10167 // Make sure the stack slot is also aligned for the register type.
10168 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
10169 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10170
10171 // Perform the original store, only redirected to the stack slot.
10172 SDValue Store = DAG.getTruncStore(
10173 Chain, dl, Val, StackPtr,
10174 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
10175
10176 EVT StackPtrVT = StackPtr.getValueType();
10177
10178 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
10179 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
10181 unsigned Offset = 0;
10182
10183 // Do all but one copies using the full register width.
10184 for (unsigned i = 1; i < NumRegs; i++) {
10185 // Load one integer register's worth from the stack slot.
10186 SDValue Load = DAG.getLoad(
10187 RegVT, dl, Store, StackPtr,
10188 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
10189 // Store it to the final location. Remember the store.
10190 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
10191 ST->getPointerInfo().getWithOffset(Offset),
10192 ST->getOriginalAlign(),
10193 ST->getMemOperand()->getFlags()));
10194 // Increment the pointers.
10195 Offset += RegBytes;
10196 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
10197 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
10198 }
10199
10200 // The last store may be partial. Do a truncating store. On big-endian
10201 // machines this requires an extending load from the stack slot to ensure
10202 // that the bits are in the right place.
10203 EVT LoadMemVT =
10204 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
10205
10206 // Load from the stack slot.
10207 SDValue Load = DAG.getExtLoad(
10208 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
10209 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
10210
10211 Stores.push_back(
10212 DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
10213 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
10214 ST->getOriginalAlign(),
10215 ST->getMemOperand()->getFlags(), ST->getAAInfo()));
10216 // The order of the stores doesn't matter - say it with a TokenFactor.
10217 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
10218 return Result;
10219 }
10220
10221 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
10222 "Unaligned store of unknown type.");
10223 // Get the half-size VT
10224 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
10225 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
10226 unsigned IncrementSize = NumBits / 8;
10227
10228 // Divide the stored value in two parts.
10229 SDValue ShiftAmount =
10230 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
10231 SDValue Lo = Val;
10232 // If Val is a constant, replace the upper bits with 0. The SRL will constant
10233 // fold and not use the upper bits. A smaller constant may be easier to
10234 // materialize.
10235 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
10236 Lo = DAG.getNode(
10237 ISD::AND, dl, VT, Lo,
10238 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
10239 VT));
10240 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
10241
10242 // Store the two parts
10243 SDValue Store1, Store2;
10244 Store1 = DAG.getTruncStore(Chain, dl,
10245 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
10246 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
10247 ST->getMemOperand()->getFlags());
10248
10249 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
10250 Store2 = DAG.getTruncStore(
10251 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
10252 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
10253 ST->getMemOperand()->getFlags(), ST->getAAInfo());
10254
10255 SDValue Result =
10256 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
10257 return Result;
10258}
10259
10260SDValue
10262 const SDLoc &DL, EVT DataVT,
10263 SelectionDAG &DAG,
10264 bool IsCompressedMemory) const {
10265 SDValue Increment;
10266 EVT AddrVT = Addr.getValueType();
10267 EVT MaskVT = Mask.getValueType();
10268 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
10269 "Incompatible types of Data and Mask");
10270 if (IsCompressedMemory) {
10271 if (DataVT.isScalableVector())
10273 "Cannot currently handle compressed memory with scalable vectors");
10274 // Incrementing the pointer according to number of '1's in the mask.
10275 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
10276 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
10277 if (MaskIntVT.getSizeInBits() < 32) {
10278 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
10279 MaskIntVT = MVT::i32;
10280 }
10281
10282 // Count '1's with POPCNT.
10283 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
10284 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
10285 // Scale is an element size in bytes.
10286 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
10287 AddrVT);
10288 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
10289 } else if (DataVT.isScalableVector()) {
10290 Increment = DAG.getVScale(DL, AddrVT,
10291 APInt(AddrVT.getFixedSizeInBits(),
10292 DataVT.getStoreSize().getKnownMinValue()));
10293 } else
10294 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
10295
10296 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
10297}
10298
10300 EVT VecVT, const SDLoc &dl,
10301 ElementCount SubEC) {
10302 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
10303 "Cannot index a scalable vector within a fixed-width vector");
10304
10305 unsigned NElts = VecVT.getVectorMinNumElements();
10306 unsigned NumSubElts = SubEC.getKnownMinValue();
10307 EVT IdxVT = Idx.getValueType();
10308
10309 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
10310 // If this is a constant index and we know the value plus the number of the
10311 // elements in the subvector minus one is less than the minimum number of
10312 // elements then it's safe to return Idx.
10313 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
10314 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
10315 return Idx;
10316 SDValue VS =
10317 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
10318 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
10319 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
10320 DAG.getConstant(NumSubElts, dl, IdxVT));
10321 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
10322 }
10323 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
10324 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
10325 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
10326 DAG.getConstant(Imm, dl, IdxVT));
10327 }
10328 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
10329 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
10330 DAG.getConstant(MaxIndex, dl, IdxVT));
10331}
10332
10334 SDValue VecPtr, EVT VecVT,
10335 SDValue Index) const {
10336 return getVectorSubVecPointer(
10337 DAG, VecPtr, VecVT,
10339 Index);
10340}
10341
10343 SDValue VecPtr, EVT VecVT,
10344 EVT SubVecVT,
10345 SDValue Index) const {
10346 SDLoc dl(Index);
10347 // Make sure the index type is big enough to compute in.
10348 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10349
10350 EVT EltVT = VecVT.getVectorElementType();
10351
10352 // Calculate the element offset and add it to the pointer.
10353 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10354 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10355 "Converting bits to bytes lost precision");
10356 assert(SubVecVT.getVectorElementType() == EltVT &&
10357 "Sub-vector must be a vector with matching element type");
10358 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10359 SubVecVT.getVectorElementCount());
10360
10361 EVT IdxVT = Index.getValueType();
10362 if (SubVecVT.isScalableVector())
10363 Index =
10364 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10365 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10366
10367 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10368 DAG.getConstant(EltSize, dl, IdxVT));
10369 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10370}
10371
10372//===----------------------------------------------------------------------===//
10373// Implementation of Emulated TLS Model
10374//===----------------------------------------------------------------------===//
10375
10377 SelectionDAG &DAG) const {
10378 // Access to address of TLS varialbe xyz is lowered to a function call:
10379 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10380 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10381 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10382 SDLoc dl(GA);
10383
10384 ArgListTy Args;
10385 ArgListEntry Entry;
10386 const GlobalValue *GV =
10387 cast<GlobalValue>(GA->getGlobal()->stripPointerCastsAndAliases());
10388 SmallString<32> NameString("__emutls_v.");
10389 NameString += GV->getName();
10390 StringRef EmuTlsVarName(NameString);
10391 const GlobalVariable *EmuTlsVar =
10392 GV->getParent()->getNamedGlobal(EmuTlsVarName);
10393 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10394 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
10395 Entry.Ty = VoidPtrType;
10396 Args.push_back(Entry);
10397
10398 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10399
10401 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10402 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10403 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10404
10405 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10406 // At last for X86 targets, maybe good for other targets too?
10408 MFI.setAdjustsStack(true); // Is this only for X86 target?
10409 MFI.setHasCalls(true);
10410
10411 assert((GA->getOffset() == 0) &&
10412 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10413 return CallResult.first;
10414}
10415
10417 SelectionDAG &DAG) const {
10418 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10419 if (!isCtlzFast())
10420 return SDValue();
10421 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10422 SDLoc dl(Op);
10423 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10424 EVT VT = Op.getOperand(0).getValueType();
10425 SDValue Zext = Op.getOperand(0);
10426 if (VT.bitsLT(MVT::i32)) {
10427 VT = MVT::i32;
10428 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10429 }
10430 unsigned Log2b = Log2_32(VT.getSizeInBits());
10431 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10432 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10433 DAG.getConstant(Log2b, dl, MVT::i32));
10434 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10435 }
10436 return SDValue();
10437}
10438
10440 SDValue Op0 = Node->getOperand(0);
10441 SDValue Op1 = Node->getOperand(1);
10442 EVT VT = Op0.getValueType();
10443 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10444 unsigned Opcode = Node->getOpcode();
10445 SDLoc DL(Node);
10446
10447 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10448 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10450 Op0 = DAG.getFreeze(Op0);
10451 SDValue Zero = DAG.getConstant(0, DL, VT);
10452 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10453 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10454 }
10455
10456 // umin(x,y) -> sub(x,usubsat(x,y))
10457 // TODO: Missing freeze(Op0)?
10458 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10460 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10461 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10462 }
10463
10464 // umax(x,y) -> add(x,usubsat(y,x))
10465 // TODO: Missing freeze(Op0)?
10466 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10468 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10469 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10470 }
10471
10472 // FIXME: Should really try to split the vector in case it's legal on a
10473 // subvector.
10475 return DAG.UnrollVectorOp(Node);
10476
10477 // Attempt to find an existing SETCC node that we can reuse.
10478 // TODO: Do we need a generic doesSETCCNodeExist?
10479 // TODO: Missing freeze(Op0)/freeze(Op1)?
10480 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10481 ISD::CondCode PrefCommuteCC,
10482 ISD::CondCode AltCommuteCC) {
10483 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10484 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10485 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10486 {Op0, Op1, DAG.getCondCode(CC)})) {
10487 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10488 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10489 }
10490 }
10491 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10492 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10493 {Op0, Op1, DAG.getCondCode(CC)})) {
10494 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10495 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10496 }
10497 }
10498 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10499 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10500 };
10501
10502 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10503 // -> Y = (A < B) ? B : A
10504 // -> Y = (A >= B) ? A : B
10505 // -> Y = (A <= B) ? B : A
10506 switch (Opcode) {
10507 case ISD::SMAX:
10508 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10509 case ISD::SMIN:
10510 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10511 case ISD::UMAX:
10512 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10513 case ISD::UMIN:
10514 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10515 }
10516
10517 llvm_unreachable("How did we get here?");
10518}
10519
10521 unsigned Opcode = Node->getOpcode();
10522 SDValue LHS = Node->getOperand(0);
10523 SDValue RHS = Node->getOperand(1);
10524 EVT VT = LHS.getValueType();
10525 SDLoc dl(Node);
10526
10527 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10528 assert(VT.isInteger() && "Expected operands to be integers");
10529
10530 // usub.sat(a, b) -> umax(a, b) - b
10531 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10532 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10533 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10534 }
10535
10536 // uadd.sat(a, b) -> umin(a, ~b) + b
10537 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10538 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10539 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10540 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10541 }
10542
10543 unsigned OverflowOp;
10544 switch (Opcode) {
10545 case ISD::SADDSAT:
10546 OverflowOp = ISD::SADDO;
10547 break;
10548 case ISD::UADDSAT:
10549 OverflowOp = ISD::UADDO;
10550 break;
10551 case ISD::SSUBSAT:
10552 OverflowOp = ISD::SSUBO;
10553 break;
10554 case ISD::USUBSAT:
10555 OverflowOp = ISD::USUBO;
10556 break;
10557 default:
10558 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10559 "addition or subtraction node.");
10560 }
10561
10562 // FIXME: Should really try to split the vector in case it's legal on a
10563 // subvector.
10565 return DAG.UnrollVectorOp(Node);
10566
10567 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10568 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10569 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10570 SDValue SumDiff = Result.getValue(0);
10571 SDValue Overflow = Result.getValue(1);
10572 SDValue Zero = DAG.getConstant(0, dl, VT);
10573 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10574
10575 if (Opcode == ISD::UADDSAT) {
10577 // (LHS + RHS) | OverflowMask
10578 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10579 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10580 }
10581 // Overflow ? 0xffff.... : (LHS + RHS)
10582 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10583 }
10584
10585 if (Opcode == ISD::USUBSAT) {
10587 // (LHS - RHS) & ~OverflowMask
10588 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10589 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10590 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10591 }
10592 // Overflow ? 0 : (LHS - RHS)
10593 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10594 }
10595
10596 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10599
10600 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10601 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10602
10603 // If either of the operand signs are known, then they are guaranteed to
10604 // only saturate in one direction. If non-negative they will saturate
10605 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10606 //
10607 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10608 // sign of 'y' has to be flipped.
10609
10610 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10611 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10612 : KnownRHS.isNegative();
10613 if (LHSIsNonNegative || RHSIsNonNegative) {
10614 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10615 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10616 }
10617
10618 bool LHSIsNegative = KnownLHS.isNegative();
10619 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10620 : KnownRHS.isNonNegative();
10621 if (LHSIsNegative || RHSIsNegative) {
10622 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10623 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10624 }
10625 }
10626
10627 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10629 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10630 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10631 DAG.getConstant(BitWidth - 1, dl, VT));
10632 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10633 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10634}
10635
10637 unsigned Opcode = Node->getOpcode();
10638 SDValue LHS = Node->getOperand(0);
10639 SDValue RHS = Node->getOperand(1);
10640 EVT VT = LHS.getValueType();
10641 EVT ResVT = Node->getValueType(0);
10642 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10643 SDLoc dl(Node);
10644
10645 auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
10646 auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
10647 SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
10648 SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
10649
10650 // We can't perform arithmetic on i1 values. Extending them would
10651 // probably result in worse codegen, so let's just use two selects instead.
10652 // Some targets are also just better off using selects rather than subtraction
10653 // because one of the conditions can be merged with one of the selects.
10654 // And finally, if we don't know the contents of high bits of a boolean value
10655 // we can't perform any arithmetic either.
10656 if (shouldExpandCmpUsingSelects() || BoolVT.getScalarSizeInBits() == 1 ||
10658 SDValue SelectZeroOrOne =
10659 DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
10660 DAG.getConstant(0, dl, ResVT));
10661 return DAG.getSelect(dl, ResVT, IsLT, DAG.getAllOnesConstant(dl, ResVT),
10662 SelectZeroOrOne);
10663 }
10664
10666 std::swap(IsGT, IsLT);
10667 return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
10668 ResVT);
10669}
10670
10672 unsigned Opcode = Node->getOpcode();
10673 bool IsSigned = Opcode == ISD::SSHLSAT;
10674 SDValue LHS = Node->getOperand(0);
10675 SDValue RHS = Node->getOperand(1);
10676 EVT VT = LHS.getValueType();
10677 SDLoc dl(Node);
10678
10679 assert((Node->getOpcode() == ISD::SSHLSAT ||
10680 Node->getOpcode() == ISD::USHLSAT) &&
10681 "Expected a SHLSAT opcode");
10682 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10683 assert(VT.isInteger() && "Expected operands to be integers");
10684
10686 return DAG.UnrollVectorOp(Node);
10687
10688 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10689
10690 unsigned BW = VT.getScalarSizeInBits();
10691 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10692 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10693 SDValue Orig =
10694 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10695
10696 SDValue SatVal;
10697 if (IsSigned) {
10698 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10699 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10700 SDValue Cond =
10701 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10702 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10703 } else {
10704 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10705 }
10706 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10707 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10708}
10709
10711 bool Signed, EVT WideVT,
10712 const SDValue LL, const SDValue LH,
10713 const SDValue RL, const SDValue RH,
10714 SDValue &Lo, SDValue &Hi) const {
10715 // We can fall back to a libcall with an illegal type for the MUL if we
10716 // have a libcall big enough.
10717 // Also, we can fall back to a division in some cases, but that's a big
10718 // performance hit in the general case.
10719 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10720 if (WideVT == MVT::i16)
10721 LC = RTLIB::MUL_I16;
10722 else if (WideVT == MVT::i32)
10723 LC = RTLIB::MUL_I32;
10724 else if (WideVT == MVT::i64)
10725 LC = RTLIB::MUL_I64;
10726 else if (WideVT == MVT::i128)
10727 LC = RTLIB::MUL_I128;
10728
10729 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10730 // We'll expand the multiplication by brute force because we have no other
10731 // options. This is a trivially-generalized version of the code from
10732 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10733 // 4.3.1).
10734 EVT VT = LL.getValueType();
10735 unsigned Bits = VT.getSizeInBits();
10736 unsigned HalfBits = Bits >> 1;
10737 SDValue Mask =
10738 DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10739 SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10740 SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10741
10742 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10743 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10744
10745 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10746 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10747 SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10748 SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10749
10750 SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10751 DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10752 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10753 SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10754
10755 SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10756 DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10757 SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10758
10759 SDValue W =
10760 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10761 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10762 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10763 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10764
10765 Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10766 DAG.getNode(ISD::ADD, dl, VT,
10767 DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10768 DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10769 } else {
10770 // Attempt a libcall.
10771 SDValue Ret;
10773 CallOptions.setSExt(Signed);
10774 CallOptions.setIsPostTypeLegalization(true);
10775 if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10776 // Halves of WideVT are packed into registers in different order
10777 // depending on platform endianness. This is usually handled by
10778 // the C calling convention, but we can't defer to it in
10779 // the legalizer.
10780 SDValue Args[] = {LL, LH, RL, RH};
10781 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10782 } else {
10783 SDValue Args[] = {LH, LL, RH, RL};
10784 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10785 }
10786 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10787 "Ret value is a collection of constituent nodes holding result.");
10788 if (DAG.getDataLayout().isLittleEndian()) {
10789 // Same as above.
10790 Lo = Ret.getOperand(0);
10791 Hi = Ret.getOperand(1);
10792 } else {
10793 Lo = Ret.getOperand(1);
10794 Hi = Ret.getOperand(0);
10795 }
10796 }
10797}
10798
10800 bool Signed, const SDValue LHS,
10801 const SDValue RHS, SDValue &Lo,
10802 SDValue &Hi) const {
10803 EVT VT = LHS.getValueType();
10804 assert(RHS.getValueType() == VT && "Mismatching operand types");
10805
10806 SDValue HiLHS;
10807 SDValue HiRHS;
10808 if (Signed) {
10809 // The high part is obtained by SRA'ing all but one of the bits of low
10810 // part.
10811 unsigned LoSize = VT.getFixedSizeInBits();
10812 HiLHS = DAG.getNode(
10813 ISD::SRA, dl, VT, LHS,
10814 DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10815 HiRHS = DAG.getNode(
10816 ISD::SRA, dl, VT, RHS,
10817 DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10818 } else {
10819 HiLHS = DAG.getConstant(0, dl, VT);
10820 HiRHS = DAG.getConstant(0, dl, VT);
10821 }
10822 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10823 forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10824}
10825
10826SDValue
10828 assert((Node->getOpcode() == ISD::SMULFIX ||
10829 Node->getOpcode() == ISD::UMULFIX ||
10830 Node->getOpcode() == ISD::SMULFIXSAT ||
10831 Node->getOpcode() == ISD::UMULFIXSAT) &&
10832 "Expected a fixed point multiplication opcode");
10833
10834 SDLoc dl(Node);
10835 SDValue LHS = Node->getOperand(0);
10836 SDValue RHS = Node->getOperand(1);
10837 EVT VT = LHS.getValueType();
10838 unsigned Scale = Node->getConstantOperandVal(2);
10839 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10840 Node->getOpcode() == ISD::UMULFIXSAT);
10841 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10842 Node->getOpcode() == ISD::SMULFIXSAT);
10843 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10844 unsigned VTSize = VT.getScalarSizeInBits();
10845
10846 if (!Scale) {
10847 // [us]mul.fix(a, b, 0) -> mul(a, b)
10848 if (!Saturating) {
10850 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10851 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10852 SDValue Result =
10853 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10854 SDValue Product = Result.getValue(0);
10855 SDValue Overflow = Result.getValue(1);
10856 SDValue Zero = DAG.getConstant(0, dl, VT);
10857
10858 APInt MinVal = APInt::getSignedMinValue(VTSize);
10859 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
10860 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10861 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10862 // Xor the inputs, if resulting sign bit is 0 the product will be
10863 // positive, else negative.
10864 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10865 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
10866 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
10867 return DAG.getSelect(dl, VT, Overflow, Result, Product);
10868 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
10869 SDValue Result =
10870 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10871 SDValue Product = Result.getValue(0);
10872 SDValue Overflow = Result.getValue(1);
10873
10874 APInt MaxVal = APInt::getMaxValue(VTSize);
10875 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10876 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
10877 }
10878 }
10879
10880 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10881 "Expected scale to be less than the number of bits if signed or at "
10882 "most the number of bits if unsigned.");
10883 assert(LHS.getValueType() == RHS.getValueType() &&
10884 "Expected both operands to be the same type");
10885
10886 // Get the upper and lower bits of the result.
10887 SDValue Lo, Hi;
10888 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10889 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10890 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
10891 if (VT.isVector())
10892 WideVT =
10894 if (isOperationLegalOrCustom(LoHiOp, VT)) {
10895 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
10896 Lo = Result.getValue(0);
10897 Hi = Result.getValue(1);
10898 } else if (isOperationLegalOrCustom(HiOp, VT)) {
10899 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10900 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
10901 } else if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
10902 // Try for a multiplication using a wider type.
10903 unsigned Ext = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10904 SDValue LHSExt = DAG.getNode(Ext, dl, WideVT, LHS);
10905 SDValue RHSExt = DAG.getNode(Ext, dl, WideVT, RHS);
10906 SDValue Res = DAG.getNode(ISD::MUL, dl, WideVT, LHSExt, RHSExt);
10907 Lo = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
10908 SDValue Shifted =
10909 DAG.getNode(ISD::SRA, dl, WideVT, Res,
10910 DAG.getShiftAmountConstant(VTSize, WideVT, dl));
10911 Hi = DAG.getNode(ISD::TRUNCATE, dl, VT, Shifted);
10912 } else if (VT.isVector()) {
10913 return SDValue();
10914 } else {
10915 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
10916 }
10917
10918 if (Scale == VTSize)
10919 // Result is just the top half since we'd be shifting by the width of the
10920 // operand. Overflow impossible so this works for both UMULFIX and
10921 // UMULFIXSAT.
10922 return Hi;
10923
10924 // The result will need to be shifted right by the scale since both operands
10925 // are scaled. The result is given to us in 2 halves, so we only want part of
10926 // both in the result.
10927 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
10928 DAG.getShiftAmountConstant(Scale, VT, dl));
10929 if (!Saturating)
10930 return Result;
10931
10932 if (!Signed) {
10933 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
10934 // widened multiplication) aren't all zeroes.
10935
10936 // Saturate to max if ((Hi >> Scale) != 0),
10937 // which is the same as if (Hi > ((1 << Scale) - 1))
10938 APInt MaxVal = APInt::getMaxValue(VTSize);
10939 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
10940 dl, VT);
10941 Result = DAG.getSelectCC(dl, Hi, LowMask,
10942 DAG.getConstant(MaxVal, dl, VT), Result,
10943 ISD::SETUGT);
10944
10945 return Result;
10946 }
10947
10948 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
10949 // widened multiplication) aren't all ones or all zeroes.
10950
10951 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
10952 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
10953
10954 if (Scale == 0) {
10955 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
10956 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
10957 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
10958 // Saturated to SatMin if wide product is negative, and SatMax if wide
10959 // product is positive ...
10960 SDValue Zero = DAG.getConstant(0, dl, VT);
10961 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
10962 ISD::SETLT);
10963 // ... but only if we overflowed.
10964 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
10965 }
10966
10967 // We handled Scale==0 above so all the bits to examine is in Hi.
10968
10969 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
10970 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
10971 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
10972 dl, VT);
10973 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
10974 // Saturate to min if (Hi >> (Scale - 1)) < -1),
10975 // which is the same as if (HI < (-1 << (Scale - 1))
10976 SDValue HighMask =
10977 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
10978 dl, VT);
10979 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
10980 return Result;
10981}
10982
10983SDValue
10985 SDValue LHS, SDValue RHS,
10986 unsigned Scale, SelectionDAG &DAG) const {
10987 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
10988 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
10989 "Expected a fixed point division opcode");
10990
10991 EVT VT = LHS.getValueType();
10992 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
10993 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
10994 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10995
10996 // If there is enough room in the type to upscale the LHS or downscale the
10997 // RHS before the division, we can perform it in this type without having to
10998 // resize. For signed operations, the LHS headroom is the number of
10999 // redundant sign bits, and for unsigned ones it is the number of zeroes.
11000 // The headroom for the RHS is the number of trailing zeroes.
11001 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
11003 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11004
11005 // For signed saturating operations, we need to be able to detect true integer
11006 // division overflow; that is, when you have MIN / -EPS. However, this
11007 // is undefined behavior and if we emit divisions that could take such
11008 // values it may cause undesired behavior (arithmetic exceptions on x86, for
11009 // example).
11010 // Avoid this by requiring an extra bit so that we never get this case.
11011 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
11012 // signed saturating division, we need to emit a whopping 32-bit division.
11013 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
11014 return SDValue();
11015
11016 unsigned LHSShift = std::min(LHSLead, Scale);
11017 unsigned RHSShift = Scale - LHSShift;
11018
11019 // At this point, we know that if we shift the LHS up by LHSShift and the
11020 // RHS down by RHSShift, we can emit a regular division with a final scaling
11021 // factor of Scale.
11022
11023 if (LHSShift)
11024 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
11025 DAG.getShiftAmountConstant(LHSShift, VT, dl));
11026 if (RHSShift)
11027 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
11028 DAG.getShiftAmountConstant(RHSShift, VT, dl));
11029
11030 SDValue Quot;
11031 if (Signed) {
11032 // For signed operations, if the resulting quotient is negative and the
11033 // remainder is nonzero, subtract 1 from the quotient to round towards
11034 // negative infinity.
11035 SDValue Rem;
11036 // FIXME: Ideally we would always produce an SDIVREM here, but if the
11037 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
11038 // we couldn't just form a libcall, but the type legalizer doesn't do it.
11039 if (isTypeLegal(VT) &&
11041 Quot = DAG.getNode(ISD::SDIVREM, dl,
11042 DAG.getVTList(VT, VT),
11043 LHS, RHS);
11044 Rem = Quot.getValue(1);
11045 Quot = Quot.getValue(0);
11046 } else {
11047 Quot = DAG.getNode(ISD::SDIV, dl, VT,
11048 LHS, RHS);
11049 Rem = DAG.getNode(ISD::SREM, dl, VT,
11050 LHS, RHS);
11051 }
11052 SDValue Zero = DAG.getConstant(0, dl, VT);
11053 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
11054 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
11055 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
11056 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
11057 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
11058 DAG.getConstant(1, dl, VT));
11059 Quot = DAG.getSelect(dl, VT,
11060 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
11061 Sub1, Quot);
11062 } else
11063 Quot = DAG.getNode(ISD::UDIV, dl, VT,
11064 LHS, RHS);
11065
11066 return Quot;
11067}
11068
11070 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11071 SDLoc dl(Node);
11072 SDValue LHS = Node->getOperand(0);
11073 SDValue RHS = Node->getOperand(1);
11074 bool IsAdd = Node->getOpcode() == ISD::UADDO;
11075
11076 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
11077 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
11078 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
11079 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
11080 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
11081 { LHS, RHS, CarryIn });
11082 Result = SDValue(NodeCarry.getNode(), 0);
11083 Overflow = SDValue(NodeCarry.getNode(), 1);
11084 return;
11085 }
11086
11087 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11088 LHS.getValueType(), LHS, RHS);
11089
11090 EVT ResultType = Node->getValueType(1);
11091 EVT SetCCType = getSetCCResultType(
11092 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11093 SDValue SetCC;
11094 if (IsAdd && isOneConstant(RHS)) {
11095 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
11096 // the live range of X. We assume comparing with 0 is cheap.
11097 // The general case (X + C) < C is not necessarily beneficial. Although we
11098 // reduce the live range of X, we may introduce the materialization of
11099 // constant C.
11100 SetCC =
11101 DAG.getSetCC(dl, SetCCType, Result,
11102 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
11103 } else if (IsAdd && isAllOnesConstant(RHS)) {
11104 // Special case: uaddo X, -1 overflows if X != 0.
11105 SetCC =
11106 DAG.getSetCC(dl, SetCCType, LHS,
11107 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
11108 } else {
11110 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
11111 }
11112 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11113}
11114
11116 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
11117 SDLoc dl(Node);
11118 SDValue LHS = Node->getOperand(0);
11119 SDValue RHS = Node->getOperand(1);
11120 bool IsAdd = Node->getOpcode() == ISD::SADDO;
11121
11122 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
11123 LHS.getValueType(), LHS, RHS);
11124
11125 EVT ResultType = Node->getValueType(1);
11126 EVT OType = getSetCCResultType(
11127 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
11128
11129 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
11130 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
11131 if (isOperationLegal(OpcSat, LHS.getValueType())) {
11132 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
11133 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
11134 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
11135 return;
11136 }
11137
11138 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
11139
11140 // For an addition, the result should be less than one of the operands (LHS)
11141 // if and only if the other operand (RHS) is negative, otherwise there will
11142 // be overflow.
11143 // For a subtraction, the result should be less than one of the operands
11144 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
11145 // otherwise there will be overflow.
11146 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
11147 SDValue ConditionRHS =
11148 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
11149
11150 Overflow = DAG.getBoolExtOrTrunc(
11151 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
11152 ResultType, ResultType);
11153}
11154
11156 SDValue &Overflow, SelectionDAG &DAG) const {
11157 SDLoc dl(Node);
11158 EVT VT = Node->getValueType(0);
11159 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
11160 SDValue LHS = Node->getOperand(0);
11161 SDValue RHS = Node->getOperand(1);
11162 bool isSigned = Node->getOpcode() == ISD::SMULO;
11163
11164 // For power-of-two multiplications we can use a simpler shift expansion.
11165 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
11166 const APInt &C = RHSC->getAPIntValue();
11167 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
11168 if (C.isPowerOf2()) {
11169 // smulo(x, signed_min) is same as umulo(x, signed_min).
11170 bool UseArithShift = isSigned && !C.isMinSignedValue();
11171 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
11172 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
11173 Overflow = DAG.getSetCC(dl, SetCCVT,
11174 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
11175 dl, VT, Result, ShiftAmt),
11176 LHS, ISD::SETNE);
11177 return true;
11178 }
11179 }
11180
11181 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
11182 if (VT.isVector())
11183 WideVT =
11185
11186 SDValue BottomHalf;
11187 SDValue TopHalf;
11188 static const unsigned Ops[2][3] =
11191 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
11192 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
11193 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
11194 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
11195 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
11196 RHS);
11197 TopHalf = BottomHalf.getValue(1);
11198 } else if (isTypeLegal(WideVT)) {
11199 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
11200 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
11201 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
11202 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
11203 SDValue ShiftAmt =
11204 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
11205 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
11206 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
11207 } else {
11208 if (VT.isVector())
11209 return false;
11210
11211 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
11212 }
11213
11214 Result = BottomHalf;
11215 if (isSigned) {
11216 SDValue ShiftAmt = DAG.getShiftAmountConstant(
11217 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
11218 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
11219 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
11220 } else {
11221 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
11222 DAG.getConstant(0, dl, VT), ISD::SETNE);
11223 }
11224
11225 // Truncate the result if SetCC returns a larger type than needed.
11226 EVT RType = Node->getValueType(1);
11227 if (RType.bitsLT(Overflow.getValueType()))
11228 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
11229
11230 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
11231 "Unexpected result type for S/UMULO legalization");
11232 return true;
11233}
11234
11236 SDLoc dl(Node);
11237 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11238 SDValue Op = Node->getOperand(0);
11239 EVT VT = Op.getValueType();
11240
11241 if (VT.isScalableVector())
11243 "Expanding reductions for scalable vectors is undefined.");
11244
11245 // Try to use a shuffle reduction for power of two vectors.
11246 if (VT.isPow2VectorType()) {
11247 while (VT.getVectorNumElements() > 1) {
11248 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
11249 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
11250 break;
11251
11252 SDValue Lo, Hi;
11253 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
11254 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
11255 VT = HalfVT;
11256 }
11257 }
11258
11259 EVT EltVT = VT.getVectorElementType();
11260 unsigned NumElts = VT.getVectorNumElements();
11261
11263 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
11264
11265 SDValue Res = Ops[0];
11266 for (unsigned i = 1; i < NumElts; i++)
11267 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
11268
11269 // Result type may be wider than element type.
11270 if (EltVT != Node->getValueType(0))
11271 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
11272 return Res;
11273}
11274
11276 SDLoc dl(Node);
11277 SDValue AccOp = Node->getOperand(0);
11278 SDValue VecOp = Node->getOperand(1);
11279 SDNodeFlags Flags = Node->getFlags();
11280
11281 EVT VT = VecOp.getValueType();
11282 EVT EltVT = VT.getVectorElementType();
11283
11284 if (VT.isScalableVector())
11286 "Expanding reductions for scalable vectors is undefined.");
11287
11288 unsigned NumElts = VT.getVectorNumElements();
11289
11291 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
11292
11293 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
11294
11295 SDValue Res = AccOp;
11296 for (unsigned i = 0; i < NumElts; i++)
11297 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
11298
11299 return Res;
11300}
11301
11303 SelectionDAG &DAG) const {
11304 EVT VT = Node->getValueType(0);
11305 SDLoc dl(Node);
11306 bool isSigned = Node->getOpcode() == ISD::SREM;
11307 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
11308 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
11309 SDValue Dividend = Node->getOperand(0);
11310 SDValue Divisor = Node->getOperand(1);
11311 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
11312 SDVTList VTs = DAG.getVTList(VT, VT);
11313 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
11314 return true;
11315 }
11316 if (isOperationLegalOrCustom(DivOpc, VT)) {
11317 // X % Y -> X-X/Y*Y
11318 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
11319 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
11320 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
11321 return true;
11322 }
11323 return false;
11324}
11325
11327 SelectionDAG &DAG) const {
11328 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
11329 SDLoc dl(SDValue(Node, 0));
11330 SDValue Src = Node->getOperand(0);
11331
11332 // DstVT is the result type, while SatVT is the size to which we saturate
11333 EVT SrcVT = Src.getValueType();
11334 EVT DstVT = Node->getValueType(0);
11335
11336 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
11337 unsigned SatWidth = SatVT.getScalarSizeInBits();
11338 unsigned DstWidth = DstVT.getScalarSizeInBits();
11339 assert(SatWidth <= DstWidth &&
11340 "Expected saturation width smaller than result width");
11341
11342 // Determine minimum and maximum integer values and their corresponding
11343 // floating-point values.
11344 APInt MinInt, MaxInt;
11345 if (IsSigned) {
11346 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
11347 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
11348 } else {
11349 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
11350 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
11351 }
11352
11353 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
11354 // libcall emission cannot handle this. Large result types will fail.
11355 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
11356 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
11357 SrcVT = Src.getValueType();
11358 }
11359
11360 const fltSemantics &Sem = SrcVT.getFltSemantics();
11361 APFloat MinFloat(Sem);
11362 APFloat MaxFloat(Sem);
11363
11364 APFloat::opStatus MinStatus =
11365 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
11366 APFloat::opStatus MaxStatus =
11367 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
11368 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
11369 !(MaxStatus & APFloat::opStatus::opInexact);
11370
11371 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
11372 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
11373
11374 // If the integer bounds are exactly representable as floats and min/max are
11375 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
11376 // of comparisons and selects.
11377 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11379 if (AreExactFloatBounds && MinMaxLegal) {
11380 SDValue Clamped = Src;
11381
11382 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11383 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11384 // Clamp by MaxFloat from above. NaN cannot occur.
11385 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11386 // Convert clamped value to integer.
11387 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
11388 dl, DstVT, Clamped);
11389
11390 // In the unsigned case we're done, because we mapped NaN to MinFloat,
11391 // which will cast to zero.
11392 if (!IsSigned)
11393 return FpToInt;
11394
11395 // Otherwise, select 0 if Src is NaN.
11396 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11397 EVT SetCCVT =
11398 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11399 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11400 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
11401 }
11402
11403 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11404 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11405
11406 // Result of direct conversion. The assumption here is that the operation is
11407 // non-trapping and it's fine to apply it to an out-of-range value if we
11408 // select it away later.
11409 SDValue FpToInt =
11410 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11411
11412 SDValue Select = FpToInt;
11413
11414 EVT SetCCVT =
11415 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11416
11417 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11418 // MinInt if Src is NaN.
11419 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11420 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11421 // If Src OGT MaxFloat, select MaxInt.
11422 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11423 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11424
11425 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11426 // is already zero.
11427 if (!IsSigned)
11428 return Select;
11429
11430 // Otherwise, select 0 if Src is NaN.
11431 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11432 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11433 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11434}
11435
11437 const SDLoc &dl,
11438 SelectionDAG &DAG) const {
11439 EVT OperandVT = Op.getValueType();
11440 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11441 return Op;
11442 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11443 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11444 // can induce double-rounding which may alter the results. We can
11445 // correct for this using a trick explained in: Boldo, Sylvie, and
11446 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11447 // World Congress. 2005.
11448 unsigned BitSize = OperandVT.getScalarSizeInBits();
11449 EVT WideIntVT = OperandVT.changeTypeToInteger();
11450 SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
11451 SDValue SignBit =
11452 DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
11453 DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
11454 SDValue AbsWide;
11455 if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
11456 AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11457 } else {
11458 SDValue ClearedSign = DAG.getNode(
11459 ISD::AND, dl, WideIntVT, OpAsInt,
11460 DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
11461 AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
11462 }
11463 SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
11464 SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
11465
11466 // We can keep the narrow value as-is if narrowing was exact (no
11467 // rounding error), the wide value was NaN (the narrow value is also
11468 // NaN and should be preserved) or if we rounded to the odd value.
11469 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
11470 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11471 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11472 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11473 EVT ResultIntVTCCVT = getSetCCResultType(
11474 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11475 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11476 // The result is already odd so we don't need to do anything.
11477 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11478
11479 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11480 AbsWide.getValueType());
11481 // We keep results which are exact, odd or NaN.
11482 SDValue KeepNarrow =
11483 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
11484 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11485 // We morally performed a round-down if AbsNarrow is smaller than
11486 // AbsWide.
11487 SDValue NarrowIsRd =
11488 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11489 // If the narrow value is odd or exact, pick it.
11490 // Otherwise, narrow is even and corresponds to either the rounded-up
11491 // or rounded-down value. If narrow is the rounded-down value, we want
11492 // the rounded-up value as it will be odd.
11493 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11494 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11495 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11496 int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
11497 SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
11498 SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
11499 SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
11500 Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
11501 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11502}
11503
11505 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11506 SDValue Op = Node->getOperand(0);
11507 EVT VT = Node->getValueType(0);
11508 SDLoc dl(Node);
11509 if (VT.getScalarType() == MVT::bf16) {
11510 if (Node->getConstantOperandVal(1) == 1) {
11511 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11512 }
11513 EVT OperandVT = Op.getValueType();
11514 SDValue IsNaN = DAG.getSetCC(
11515 dl,
11516 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11517 Op, Op, ISD::SETUO);
11518
11519 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11520 // can induce double-rounding which may alter the results. We can
11521 // correct for this using a trick explained in: Boldo, Sylvie, and
11522 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11523 // World Congress. 2005.
11524 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11525 EVT I32 = F32.changeTypeToInteger();
11526 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11527 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11528
11529 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11530 // turning into infinities.
11531 SDValue NaN =
11532 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11533
11534 // Factor in the contribution of the low 16 bits.
11535 SDValue One = DAG.getConstant(1, dl, I32);
11536 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11537 DAG.getShiftAmountConstant(16, I32, dl));
11538 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11539 SDValue RoundingBias =
11540 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11541 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11542
11543 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11544 // 0x80000000.
11545 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11546
11547 // Now that we have rounded, shift the bits into position.
11548 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11549 DAG.getShiftAmountConstant(16, I32, dl));
11550 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11551 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11552 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11553 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11554 }
11555 return SDValue();
11556}
11557
11559 SelectionDAG &DAG) const {
11560 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11561 assert(Node->getValueType(0).isScalableVector() &&
11562 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11563
11564 EVT VT = Node->getValueType(0);
11565 SDValue V1 = Node->getOperand(0);
11566 SDValue V2 = Node->getOperand(1);
11567 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11568 SDLoc DL(Node);
11569
11570 // Expand through memory thusly:
11571 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11572 // Store V1, Ptr
11573 // Store V2, Ptr + sizeof(V1)
11574 // If (Imm < 0)
11575 // TrailingElts = -Imm
11576 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11577 // else
11578 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11579 // Res = Load Ptr
11580
11581 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11582
11584 VT.getVectorElementCount() * 2);
11585 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11586 EVT PtrVT = StackPtr.getValueType();
11587 auto &MF = DAG.getMachineFunction();
11588 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11589 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11590
11591 // Store the lo part of CONCAT_VECTORS(V1, V2)
11592 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11593 // Store the hi part of CONCAT_VECTORS(V1, V2)
11594 SDValue OffsetToV2 = DAG.getVScale(
11595 DL, PtrVT,
11597 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11598 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11599
11600 if (Imm >= 0) {
11601 // Load back the required element. getVectorElementPointer takes care of
11602 // clamping the index if it's out-of-bounds.
11603 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11604 // Load the spliced result
11605 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11607 }
11608
11609 uint64_t TrailingElts = -Imm;
11610
11611 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11612 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11613 SDValue TrailingBytes =
11614 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11615
11616 if (TrailingElts > VT.getVectorMinNumElements()) {
11617 SDValue VLBytes =
11618 DAG.getVScale(DL, PtrVT,
11619 APInt(PtrVT.getFixedSizeInBits(),
11621 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11622 }
11623
11624 // Calculate the start address of the spliced result.
11625 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11626
11627 // Load the spliced result
11628 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11630}
11631
11633 SelectionDAG &DAG) const {
11634 SDLoc DL(Node);
11635 SDValue Vec = Node->getOperand(0);
11636 SDValue Mask = Node->getOperand(1);
11637 SDValue Passthru = Node->getOperand(2);
11638
11639 EVT VecVT = Vec.getValueType();
11640 EVT ScalarVT = VecVT.getScalarType();
11641 EVT MaskVT = Mask.getValueType();
11642 EVT MaskScalarVT = MaskVT.getScalarType();
11643
11644 // Needs to be handled by targets that have scalable vector types.
11645 if (VecVT.isScalableVector())
11646 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
11647
11648 SDValue StackPtr = DAG.CreateStackTemporary(
11649 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
11650 int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11651 MachinePointerInfo PtrInfo =
11653
11654 MVT PositionVT = getVectorIdxTy(DAG.getDataLayout());
11655 SDValue Chain = DAG.getEntryNode();
11656 SDValue OutPos = DAG.getConstant(0, DL, PositionVT);
11657
11658 bool HasPassthru = !Passthru.isUndef();
11659
11660 // If we have a passthru vector, store it on the stack, overwrite the matching
11661 // positions and then re-write the last element that was potentially
11662 // overwritten even though mask[i] = false.
11663 if (HasPassthru)
11664 Chain = DAG.getStore(Chain, DL, Passthru, StackPtr, PtrInfo);
11665
11666 SDValue LastWriteVal;
11667 APInt PassthruSplatVal;
11668 bool IsSplatPassthru =
11669 ISD::isConstantSplatVector(Passthru.getNode(), PassthruSplatVal);
11670
11671 if (IsSplatPassthru) {
11672 // As we do not know which position we wrote to last, we cannot simply
11673 // access that index from the passthru vector. So we first check if passthru
11674 // is a splat vector, to use any element ...
11675 LastWriteVal = DAG.getConstant(PassthruSplatVal, DL, ScalarVT);
11676 } else if (HasPassthru) {
11677 // ... if it is not a splat vector, we need to get the passthru value at
11678 // position = popcount(mask) and re-load it from the stack before it is
11679 // overwritten in the loop below.
11680 SDValue Popcount = DAG.getNode(
11681 ISD::TRUNCATE, DL, MaskVT.changeVectorElementType(MVT::i1), Mask);
11682 Popcount = DAG.getNode(ISD::ZERO_EXTEND, DL,
11683 MaskVT.changeVectorElementType(ScalarVT), Popcount);
11684 Popcount = DAG.getNode(ISD::VECREDUCE_ADD, DL, ScalarVT, Popcount);
11685 SDValue LastElmtPtr =
11686 getVectorElementPointer(DAG, StackPtr, VecVT, Popcount);
11687 LastWriteVal = DAG.getLoad(
11688 ScalarVT, DL, Chain, LastElmtPtr,
11690 Chain = LastWriteVal.getValue(1);
11691 }
11692
11693 unsigned NumElms = VecVT.getVectorNumElements();
11694 for (unsigned I = 0; I < NumElms; I++) {
11696
11697 SDValue ValI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Vec, Idx);
11698 SDValue OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11699 Chain = DAG.getStore(
11700 Chain, DL, ValI, OutPtr,
11702
11703 // Get the mask value and add it to the current output position. This
11704 // either increments by 1 if MaskI is true or adds 0 otherwise.
11705 // Freeze in case we have poison/undef mask entries.
11706 SDValue MaskI = DAG.getFreeze(
11707 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskScalarVT, Mask, Idx));
11708 MaskI = DAG.getFreeze(MaskI);
11709 MaskI = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, MaskI);
11710 MaskI = DAG.getNode(ISD::ZERO_EXTEND, DL, PositionVT, MaskI);
11711 OutPos = DAG.getNode(ISD::ADD, DL, PositionVT, OutPos, MaskI);
11712
11713 if (HasPassthru && I == NumElms - 1) {
11714 SDValue EndOfVector =
11715 DAG.getConstant(VecVT.getVectorNumElements() - 1, DL, PositionVT);
11716 SDValue AllLanesSelected =
11717 DAG.getSetCC(DL, MVT::i1, OutPos, EndOfVector, ISD::CondCode::SETUGT);
11718 OutPos = DAG.getNode(ISD::UMIN, DL, PositionVT, OutPos, EndOfVector);
11719 OutPtr = getVectorElementPointer(DAG, StackPtr, VecVT, OutPos);
11720
11721 // Re-write the last ValI if all lanes were selected. Otherwise,
11722 // overwrite the last write it with the passthru value.
11723 LastWriteVal =
11724 DAG.getSelect(DL, ScalarVT, AllLanesSelected, ValI, LastWriteVal);
11725 Chain = DAG.getStore(
11726 Chain, DL, LastWriteVal, OutPtr,
11728 }
11729 }
11730
11731 return DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
11732}
11733
11735 SDValue &LHS, SDValue &RHS,
11736 SDValue &CC, SDValue Mask,
11737 SDValue EVL, bool &NeedInvert,
11738 const SDLoc &dl, SDValue &Chain,
11739 bool IsSignaling) const {
11740 MVT OpVT = LHS.getSimpleValueType();
11741 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11742 NeedInvert = false;
11743 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11744 bool IsNonVP = !EVL;
11745 switch (getCondCodeAction(CCCode, OpVT)) {
11746 default:
11747 llvm_unreachable("Unknown condition code action!");
11749 // Nothing to do.
11750 break;
11753 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11754 std::swap(LHS, RHS);
11755 CC = DAG.getCondCode(InvCC);
11756 return true;
11757 }
11758 // Swapping operands didn't work. Try inverting the condition.
11759 bool NeedSwap = false;
11760 InvCC = getSetCCInverse(CCCode, OpVT);
11761 if (!isCondCodeLegalOrCustom(InvCC, OpVT)) {
11762 // If inverting the condition is not enough, try swapping operands
11763 // on top of it.
11764 InvCC = ISD::getSetCCSwappedOperands(InvCC);
11765 NeedSwap = true;
11766 }
11767 if (isCondCodeLegalOrCustom(InvCC, OpVT)) {
11768 CC = DAG.getCondCode(InvCC);
11769 NeedInvert = true;
11770 if (NeedSwap)
11771 std::swap(LHS, RHS);
11772 return true;
11773 }
11774
11776 unsigned Opc = 0;
11777 switch (CCCode) {
11778 default:
11779 llvm_unreachable("Don't know how to expand this condition!");
11780 case ISD::SETUO:
11781 if (isCondCodeLegal(ISD::SETUNE, OpVT)) {
11782 CC1 = ISD::SETUNE;
11783 CC2 = ISD::SETUNE;
11784 Opc = ISD::OR;
11785 break;
11786 }
11788 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11789 NeedInvert = true;
11790 [[fallthrough]];
11791 case ISD::SETO:
11793 "If SETO is expanded, SETOEQ must be legal!");
11794 CC1 = ISD::SETOEQ;
11795 CC2 = ISD::SETOEQ;
11796 Opc = ISD::AND;
11797 break;
11798 case ISD::SETONE:
11799 case ISD::SETUEQ:
11800 // If the SETUO or SETO CC isn't legal, we might be able to use
11801 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11802 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11803 // the operands.
11804 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11805 if (!isCondCodeLegal(CC2, OpVT) && (isCondCodeLegal(ISD::SETOGT, OpVT) ||
11806 isCondCodeLegal(ISD::SETOLT, OpVT))) {
11807 CC1 = ISD::SETOGT;
11808 CC2 = ISD::SETOLT;
11809 Opc = ISD::OR;
11810 NeedInvert = ((unsigned)CCCode & 0x8U);
11811 break;
11812 }
11813 [[fallthrough]];
11814 case ISD::SETOEQ:
11815 case ISD::SETOGT:
11816 case ISD::SETOGE:
11817 case ISD::SETOLT:
11818 case ISD::SETOLE:
11819 case ISD::SETUNE:
11820 case ISD::SETUGT:
11821 case ISD::SETUGE:
11822 case ISD::SETULT:
11823 case ISD::SETULE:
11824 // If we are floating point, assign and break, otherwise fall through.
11825 if (!OpVT.isInteger()) {
11826 // We can use the 4th bit to tell if we are the unordered
11827 // or ordered version of the opcode.
11828 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11829 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
11830 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
11831 break;
11832 }
11833 // Fallthrough if we are unsigned integer.
11834 [[fallthrough]];
11835 case ISD::SETLE:
11836 case ISD::SETGT:
11837 case ISD::SETGE:
11838 case ISD::SETLT:
11839 case ISD::SETNE:
11840 case ISD::SETEQ:
11841 // If all combinations of inverting the condition and swapping operands
11842 // didn't work then we have no means to expand the condition.
11843 llvm_unreachable("Don't know how to expand this condition!");
11844 }
11845
11846 SDValue SetCC1, SetCC2;
11847 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11848 // If we aren't the ordered or unorder operation,
11849 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11850 if (IsNonVP) {
11851 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
11852 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
11853 } else {
11854 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
11855 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
11856 }
11857 } else {
11858 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11859 if (IsNonVP) {
11860 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
11861 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
11862 } else {
11863 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
11864 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
11865 }
11866 }
11867 if (Chain)
11868 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
11869 SetCC2.getValue(1));
11870 if (IsNonVP)
11871 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
11872 else {
11873 // Transform the binary opcode to the VP equivalent.
11874 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
11875 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11876 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
11877 }
11878 RHS = SDValue();
11879 CC = SDValue();
11880 return true;
11881 }
11882 }
11883 return false;
11884}
unsigned const MachineRegisterInfo * MRI
static const LLT F32
amdgpu AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
basic Basic Alias true
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:512
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
#define T1
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
Function const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue BuildExactUDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact UDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, TargetLowering::TargetLoweringOpt &TLO, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1249
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.h:1070
APInt bitcastToAPInt() const
Definition: APFloat.h:1266
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1050
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:1010
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:1021
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition: APInt.cpp:1543
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1728
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1385
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:427
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:401
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition: APInt.h:1370
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1470
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:184
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
void setSignBit()
Set the sign bit to 1.
Definition: APInt.h:1318
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1446
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:194
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1227
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1375
APInt reverseBits() const
Definition: APInt.cpp:737
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:812
void negate()
Negate this APInt in place.
Definition: APInt.h:1428
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1596
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1555
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1489
unsigned countLeadingZeros() const
Definition: APInt.h:1563
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:334
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
unsigned logBase2() const
Definition: APInt.h:1717
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
void setAllBits()
Set every bit to 1.
Definition: APInt.h:1297
APInt multiplicativeInverse() const
Definition: APInt.cpp:1244
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition: APInt.h:383
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:312
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1128
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition: APInt.h:1345
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:851
APInt byteSwap() const
Definition: APInt.cpp:715
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1367
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
void clearHighBits(unsigned hiBits)
Set top hiBits bits to 0.
Definition: APInt.h:1402
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1520
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1613
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition: APInt.h:1321
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1092
bool contains(Attribute::AttrKind A) const
Return true if the builder has the specified attribute.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:706
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:195
bool isBigEndian() const
Definition: DataLayout.h:196
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:842
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:357
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
std::vector< std::string > ConstraintCodeVector
Definition: InlineAsm.h:102
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:266
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_GPRel32BlockAddress
EK_GPRel32BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
@ EK_GPRel64BlockAddress
EK_GPRel64BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition: Module.h:459
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
Class to represent pointers.
Definition: DerivedTypes.h:646
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static SDNodeIterator end(const SDNode *N)
static SDNodeIterator begin(const SDNode *N)
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:736
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:968
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:493
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:451
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:842
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:487
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:876
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:488
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:782
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:482
std::optional< uint64_t > getValidMaximumShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has shift amounts that are all less than the element bit-width of the shift n...
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
std::optional< uint64_t > getValidShiftAmount(SDValue V, const APInt &DemandedElts, unsigned Depth=0) const
If a SHL/SRA/SRL node V has a uniform shift amount that is less than the element bit-width of the shi...
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:500
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:570
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:892
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:556
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
iterator end() const
Definition: StringRef.h:113
Class to represent struct types.
Definition: DerivedTypes.h:216
void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
virtual EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool shouldExpandCmpUsingSelects() const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the MVT corresponding to this LLVM type. See getValueType.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const
Get the CondCode that's to be used to test the result of the comparison libcall against zero.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom on this target.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, bool IsAfterLegalTypes, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, EVT WideVT, const SDValue LL, const SDValue LH, const SDValue RL, const SDValue RH, SDValue &Lo, SDValue &Hi) const
forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or brute force via a wide mul...
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:724
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:283
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:298
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition: Value.cpp:698
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition: APInt.cpp:2978
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:752
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:511
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:374
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:501
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:380
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:870
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:953
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition: ISDOpcodes.h:387
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1480
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:684
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:751
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:980
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1120
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:514
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:521
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:356
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1434
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1041
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:438
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:439
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1028
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:366
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:338
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:859
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:848
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:696
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:393
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:938
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1047
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:886
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:164
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:679
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition: ISDOpcodes.h:223
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:626
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:919
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:881
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:905
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:793
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:347
@ AssertZext
Definition: ISDOpcodes.h:62
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
Definition: ISDOpcodes.h:1052
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition: ISDOpcodes.h:691
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
Definition: ISDOpcodes.h:1661
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
Definition: ISDOpcodes.h:1666
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1636
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1603
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1583
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:555
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition: STLExtras.h:1754
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Definition: Utils.cpp:1613
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition: APFloat.h:1452
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:382
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:254
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:258
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:270
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition: ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:381
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:121
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:275
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:291
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:341
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:234
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:350
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:371
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:416
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:456
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:398
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:367
bool isFixedLengthVector() const
Definition: ValueTypes.h:178
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:314
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:204
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:319
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
Definition: ValueTypes.cpp:306
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:327
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:299
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:439
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber/label.
Definition: InlineAsm.h:126
int MatchingInput
MatchingInput - If this is not -1, this is an output constraint where an input constraint is required...
Definition: InlineAsm.h:136
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number.
Definition: InlineAsm.h:154
SubConstraintInfoVector multipleAlternatives
multipleAlternatives - If there are multiple alternative constraints, this array will contain them.
Definition: InlineAsm.h:161
bool isIndirect
isIndirect - True if this operand is an indirect operand.
Definition: InlineAsm.h:150
bool hasMatchingInput() const
hasMatchingInput - Return true if this is an output constraint that has a matching input constraint.
Definition: InlineAsm.h:140
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:290
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:175
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition: KnownBits.h:244
static KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
Definition: KnownBits.cpp:202
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:97
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:231
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:62
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:150
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:536
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:278
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition: KnownBits.h:222
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
static KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
Definition: KnownBits.cpp:178
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:161
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition: KnownBits.h:310
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:300
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:169
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:237
static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
Definition: KnownBits.cpp:215
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:502
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:542
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:51
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:518
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:522
bool isNegative() const
Returns true if this value is known to be negative.
Definition: KnownBits.h:94
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:797
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition: KnownBits.h:156
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:546
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:526
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:275
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:512
static KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Definition: KnownBits.cpp:196
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
MVT ConstraintVT
The ValueType for the operand value.
TargetLowering::ConstraintType ConstraintType
Information about the constraint code, e.g.
std::string ConstraintCode
This contains the actual string for the code, like "m".
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setSExt(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...