Line data Source code
1 : //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This implements the TargetLowering class.
11 : //
12 : //===----------------------------------------------------------------------===//
13 :
14 : #include "llvm/CodeGen/TargetLowering.h"
15 : #include "llvm/ADT/BitVector.h"
16 : #include "llvm/ADT/STLExtras.h"
17 : #include "llvm/CodeGen/CallingConvLower.h"
18 : #include "llvm/CodeGen/MachineFrameInfo.h"
19 : #include "llvm/CodeGen/MachineFunction.h"
20 : #include "llvm/CodeGen/MachineJumpTableInfo.h"
21 : #include "llvm/CodeGen/MachineRegisterInfo.h"
22 : #include "llvm/CodeGen/SelectionDAG.h"
23 : #include "llvm/CodeGen/TargetRegisterInfo.h"
24 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 : #include "llvm/IR/DataLayout.h"
26 : #include "llvm/IR/DerivedTypes.h"
27 : #include "llvm/IR/GlobalVariable.h"
28 : #include "llvm/IR/LLVMContext.h"
29 : #include "llvm/MC/MCAsmInfo.h"
30 : #include "llvm/MC/MCExpr.h"
31 : #include "llvm/Support/ErrorHandling.h"
32 : #include "llvm/Support/KnownBits.h"
33 : #include "llvm/Support/MathExtras.h"
34 : #include "llvm/Target/TargetLoweringObjectFile.h"
35 : #include "llvm/Target/TargetMachine.h"
36 : #include <cctype>
37 : using namespace llvm;
38 :
39 : /// NOTE: The TargetMachine owns TLOF.
40 41320 : TargetLowering::TargetLowering(const TargetMachine &tm)
41 41320 : : TargetLoweringBase(tm) {}
42 :
43 0 : const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44 0 : return nullptr;
45 : }
46 :
47 2471464 : bool TargetLowering::isPositionIndependent() const {
48 2471464 : return getTargetMachine().isPositionIndependent();
49 : }
50 :
51 : /// Check whether a given call node is in tail position within its function. If
52 : /// so, it sets Chain to the input chain of the tail call.
53 5092 : bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
54 : SDValue &Chain) const {
55 5092 : const Function &F = DAG.getMachineFunction().getFunction();
56 :
57 : // Conservatively require the attributes of the call to match those of
58 : // the return. Ignore NoAlias and NonNull because they don't affect the
59 : // call sequence.
60 5092 : AttributeList CallerAttrs = F.getAttributes();
61 10184 : if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
62 5092 : .removeAttribute(Attribute::NoAlias)
63 5092 : .removeAttribute(Attribute::NonNull)
64 5092 : .hasAttributes())
65 : return false;
66 :
67 : // It's not safe to eliminate the sign / zero extension of the return value.
68 10078 : if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
69 5039 : CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
70 0 : return false;
71 :
72 : // Check if the only use is a function return node.
73 5039 : return isUsedByReturnOnly(Node, Chain);
74 : }
75 :
76 4307 : bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
77 : const uint32_t *CallerPreservedMask,
78 : const SmallVectorImpl<CCValAssign> &ArgLocs,
79 : const SmallVectorImpl<SDValue> &OutVals) const {
80 11341 : for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
81 7044 : const CCValAssign &ArgLoc = ArgLocs[I];
82 7044 : if (!ArgLoc.isRegLoc())
83 : continue;
84 6812 : unsigned Reg = ArgLoc.getLocReg();
85 : // Only look at callee saved registers.
86 6812 : if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
87 : continue;
88 : // Check that we pass the value used for the caller.
89 : // (We look for a CopyFromReg reading a virtual register that is used
90 : // for the function live-in value of register Reg)
91 16 : SDValue Value = OutVals[I];
92 16 : if (Value->getOpcode() != ISD::CopyFromReg)
93 : return false;
94 16 : unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
95 16 : if (MRI.getLiveInPhysReg(ArgReg) != Reg)
96 : return false;
97 : }
98 : return true;
99 : }
100 :
101 : /// Set CallLoweringInfo attribute flags based on a call instruction
102 : /// and called function attributes.
103 4107668 : void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS,
104 : unsigned ArgIdx) {
105 4107668 : IsSExt = CS->paramHasAttr(ArgIdx, Attribute::SExt);
106 4107668 : IsZExt = CS->paramHasAttr(ArgIdx, Attribute::ZExt);
107 4107668 : IsInReg = CS->paramHasAttr(ArgIdx, Attribute::InReg);
108 4107668 : IsSRet = CS->paramHasAttr(ArgIdx, Attribute::StructRet);
109 4107668 : IsNest = CS->paramHasAttr(ArgIdx, Attribute::Nest);
110 4107668 : IsByVal = CS->paramHasAttr(ArgIdx, Attribute::ByVal);
111 4107668 : IsInAlloca = CS->paramHasAttr(ArgIdx, Attribute::InAlloca);
112 4107668 : IsReturned = CS->paramHasAttr(ArgIdx, Attribute::Returned);
113 4107668 : IsSwiftSelf = CS->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
114 4107668 : IsSwiftError = CS->paramHasAttr(ArgIdx, Attribute::SwiftError);
115 4107668 : Alignment = CS->getParamAlignment(ArgIdx);
116 4107668 : }
117 :
118 : /// Generate a libcall taking the given operands as arguments and returning a
119 : /// result of type RetVT.
120 : std::pair<SDValue, SDValue>
121 3140 : TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
122 : ArrayRef<SDValue> Ops, bool isSigned,
123 : const SDLoc &dl, bool doesNotReturn,
124 : bool isReturnValueUsed) const {
125 : TargetLowering::ArgListTy Args;
126 3140 : Args.reserve(Ops.size());
127 :
128 : TargetLowering::ArgListEntry Entry;
129 7498 : for (SDValue Op : Ops) {
130 4358 : Entry.Node = Op;
131 4358 : Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
132 8716 : Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
133 8716 : Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
134 4358 : Args.push_back(Entry);
135 : }
136 :
137 3140 : if (LC == RTLIB::UNKNOWN_LIBCALL)
138 0 : report_fatal_error("Unsupported library call operation!");
139 : SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
140 6280 : getPointerTy(DAG.getDataLayout()));
141 :
142 3140 : Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
143 6279 : TargetLowering::CallLoweringInfo CLI(DAG);
144 3140 : bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
145 : CLI.setDebugLoc(dl)
146 3140 : .setChain(DAG.getEntryNode())
147 3140 : .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
148 : .setNoReturn(doesNotReturn)
149 : .setDiscardResult(!isReturnValueUsed)
150 : .setSExtResult(signExtend)
151 3140 : .setZExtResult(!signExtend);
152 3140 : return LowerCallTo(CLI);
153 : }
154 :
155 : /// Soften the operands of a comparison. This code is shared among BR_CC,
156 : /// SELECT_CC, and SETCC handlers.
157 331 : void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
158 : SDValue &NewLHS, SDValue &NewRHS,
159 : ISD::CondCode &CCCode,
160 : const SDLoc &dl) const {
161 : assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
162 : && "Unsupported setcc type!");
163 :
164 : // Expand into one or more soft-fp libcall(s).
165 : RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
166 : bool ShouldInvertCC = false;
167 331 : switch (CCCode) {
168 : case ISD::SETEQ:
169 : case ISD::SETOEQ:
170 : LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
171 : (VT == MVT::f64) ? RTLIB::OEQ_F64 :
172 : (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
173 64 : break;
174 : case ISD::SETNE:
175 : case ISD::SETUNE:
176 : LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
177 : (VT == MVT::f64) ? RTLIB::UNE_F64 :
178 : (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
179 45 : break;
180 : case ISD::SETGE:
181 : case ISD::SETOGE:
182 : LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
183 : (VT == MVT::f64) ? RTLIB::OGE_F64 :
184 : (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
185 30 : break;
186 : case ISD::SETLT:
187 : case ISD::SETOLT:
188 : LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
189 : (VT == MVT::f64) ? RTLIB::OLT_F64 :
190 : (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
191 46 : break;
192 : case ISD::SETLE:
193 : case ISD::SETOLE:
194 : LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
195 : (VT == MVT::f64) ? RTLIB::OLE_F64 :
196 : (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
197 30 : break;
198 : case ISD::SETGT:
199 : case ISD::SETOGT:
200 : LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
201 : (VT == MVT::f64) ? RTLIB::OGT_F64 :
202 : (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
203 40 : break;
204 : case ISD::SETUO:
205 : LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
206 : (VT == MVT::f64) ? RTLIB::UO_F64 :
207 : (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
208 16 : break;
209 : case ISD::SETO:
210 : LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
211 : (VT == MVT::f64) ? RTLIB::O_F64 :
212 : (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
213 6 : break;
214 : case ISD::SETONE:
215 : // SETONE = SETOLT | SETOGT
216 : LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
217 : (VT == MVT::f64) ? RTLIB::OLT_F64 :
218 : (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
219 : LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
220 : (VT == MVT::f64) ? RTLIB::OGT_F64 :
221 : (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
222 5 : break;
223 : case ISD::SETUEQ:
224 : LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
225 : (VT == MVT::f64) ? RTLIB::UO_F64 :
226 : (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
227 : LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
228 : (VT == MVT::f64) ? RTLIB::OEQ_F64 :
229 : (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
230 11 : break;
231 38 : default:
232 : // Invert CC for unordered comparisons
233 : ShouldInvertCC = true;
234 : switch (CCCode) {
235 : case ISD::SETULT:
236 : LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
237 : (VT == MVT::f64) ? RTLIB::OGE_F64 :
238 : (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
239 7 : break;
240 : case ISD::SETULE:
241 : LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
242 : (VT == MVT::f64) ? RTLIB::OGT_F64 :
243 : (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
244 14 : break;
245 : case ISD::SETUGT:
246 : LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
247 : (VT == MVT::f64) ? RTLIB::OLE_F64 :
248 : (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
249 8 : break;
250 : case ISD::SETUGE:
251 : LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
252 : (VT == MVT::f64) ? RTLIB::OLT_F64 :
253 : (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
254 9 : break;
255 0 : default: llvm_unreachable("Do not know how to soften this setcc!");
256 : }
257 : }
258 :
259 : // Use the target specific return value for comparions lib calls.
260 331 : EVT RetVT = getCmpLibcallReturnType();
261 331 : SDValue Ops[2] = {NewLHS, NewRHS};
262 331 : NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/,
263 331 : dl).first;
264 331 : NewRHS = DAG.getConstant(0, dl, RetVT);
265 :
266 331 : CCCode = getCmpLibcallCC(LC1);
267 331 : if (ShouldInvertCC)
268 38 : CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
269 :
270 331 : if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
271 : SDValue Tmp = DAG.getNode(
272 : ISD::SETCC, dl,
273 16 : getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
274 16 : NewLHS, NewRHS, DAG.getCondCode(CCCode));
275 16 : NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/,
276 16 : dl).first;
277 16 : NewLHS = DAG.getNode(
278 : ISD::SETCC, dl,
279 16 : getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
280 16 : NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
281 32 : NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
282 16 : NewRHS = SDValue();
283 : }
284 331 : }
285 :
286 : /// Return the entry encoding for a jump table in the current function. The
287 : /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
288 3145 : unsigned TargetLowering::getJumpTableEncoding() const {
289 : // In non-pic modes, just use the address of a block.
290 3145 : if (!isPositionIndependent())
291 : return MachineJumpTableInfo::EK_BlockAddress;
292 :
293 : // In PIC mode, if the target supports a GPRel32 directive, use it.
294 256 : if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
295 6 : return MachineJumpTableInfo::EK_GPRel32BlockAddress;
296 :
297 : // Otherwise, use a label difference.
298 : return MachineJumpTableInfo::EK_LabelDifference32;
299 : }
300 :
301 15 : SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
302 : SelectionDAG &DAG) const {
303 : // If our PIC model is GP relative, use the global offset table as the base.
304 15 : unsigned JTEncoding = getJumpTableEncoding();
305 :
306 15 : if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
307 : (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
308 16 : return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
309 :
310 7 : return Table;
311 : }
312 :
313 : /// This returns the relocation base for the given PIC jumptable, the same as
314 : /// getPICJumpTableRelocBase, but as an MCExpr.
315 : const MCExpr *
316 2114 : TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
317 : unsigned JTI,MCContext &Ctx) const{
318 : // The normal PIC reloc base is the label at the start of the jump table.
319 2114 : return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
320 : }
321 :
322 : bool
323 939431 : TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
324 939431 : const TargetMachine &TM = getTargetMachine();
325 939431 : const GlobalValue *GV = GA->getGlobal();
326 :
327 : // If the address is not even local to this DSO we will have to load it from
328 : // a got and then add the offset.
329 939431 : if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
330 : return false;
331 :
332 : // If the code is position independent we will have to add a base register.
333 933696 : if (isPositionIndependent())
334 907691 : return false;
335 :
336 : // Otherwise we can do it.
337 : return true;
338 : }
339 :
340 : //===----------------------------------------------------------------------===//
341 : // Optimization Methods
342 : //===----------------------------------------------------------------------===//
343 :
344 : /// If the specified instruction has a constant integer operand and there are
345 : /// bits set in that constant that are not demanded, then clear those bits and
346 : /// return true.
347 882711 : bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
348 : TargetLoweringOpt &TLO) const {
349 882711 : SelectionDAG &DAG = TLO.DAG;
350 : SDLoc DL(Op);
351 : unsigned Opcode = Op.getOpcode();
352 :
353 : // Do target-specific constant optimization.
354 882711 : if (targetShrinkDemandedConstant(Op, Demanded, TLO))
355 14750 : return TLO.New.getNode();
356 :
357 : // FIXME: ISD::SELECT, ISD::SELECT_CC
358 867961 : switch (Opcode) {
359 : default:
360 : break;
361 857903 : case ISD::XOR:
362 : case ISD::AND:
363 : case ISD::OR: {
364 : auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
365 : if (!Op1C)
366 : return false;
367 :
368 : // If this is a 'not' op, don't touch it because that's a canonical form.
369 636050 : const APInt &C = Op1C->getAPIntValue();
370 644640 : if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
371 : return false;
372 :
373 636050 : if (!C.isSubsetOf(Demanded)) {
374 10981 : EVT VT = Op.getValueType();
375 21962 : SDValue NewC = DAG.getConstant(Demanded & C, DL, VT);
376 10981 : SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
377 10981 : return TLO.CombineTo(Op, NewOp);
378 : }
379 :
380 : break;
381 : }
382 : }
383 :
384 : return false;
385 : }
386 :
387 : /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
388 : /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
389 : /// generalized for targets with other types of implicit widening casts.
390 3970338 : bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
391 : const APInt &Demanded,
392 : TargetLoweringOpt &TLO) const {
393 : assert(Op.getNumOperands() == 2 &&
394 : "ShrinkDemandedOp only supports binary operators!");
395 : assert(Op.getNode()->getNumValues() == 1 &&
396 : "ShrinkDemandedOp only supports nodes with one result!");
397 :
398 3970338 : SelectionDAG &DAG = TLO.DAG;
399 : SDLoc dl(Op);
400 :
401 : // Early return, as this function cannot handle vector types.
402 7940676 : if (Op.getValueType().isVector())
403 : return false;
404 :
405 : // Don't do this if the node has another user, which may require the
406 : // full value.
407 : if (!Op.getNode()->hasOneUse())
408 : return false;
409 :
410 : // Search for the smallest integer type with free casts to and from
411 : // Op's type. For expedience, just check power-of-2 integer types.
412 2790383 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
413 : unsigned DemandedSize = Demanded.getActiveBits();
414 : unsigned SmallVTBits = DemandedSize;
415 : if (!isPowerOf2_32(SmallVTBits))
416 97472 : SmallVTBits = NextPowerOf2(SmallVTBits);
417 3156145 : for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
418 187223 : EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
419 520332 : if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
420 291772 : TLI.isZExtFree(SmallVT, Op.getValueType())) {
421 : // We found a type with free casts.
422 : SDValue X = DAG.getNode(
423 : Op.getOpcode(), dl, SmallVT,
424 : DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
425 4342 : DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
426 : assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
427 4342 : SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
428 4342 : return TLO.CombineTo(Op, Z);
429 : }
430 : }
431 : return false;
432 : }
433 :
434 : bool
435 12412 : TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
436 : const APInt &DemandedBits,
437 : DAGCombinerInfo &DCI,
438 : TargetLoweringOpt &TLO) const {
439 24824 : SDValue Op = User->getOperand(OpIdx);
440 12412 : KnownBits Known;
441 :
442 12412 : if (!SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, true))
443 : return false;
444 :
445 :
446 : // Old will not always be the same as Op. For example:
447 : //
448 : // Demanded = 0xffffff
449 : // Op = i64 truncate (i32 and x, 0xffffff)
450 : // In this case simplify demand bits will want to replace the 'and' node
451 : // with the value 'x', which will give us:
452 : // Old = i32 and x, 0xffffff
453 : // New = x
454 826 : if (TLO.Old.hasOneUse()) {
455 : // For the one use case, we just commit the change.
456 805 : DCI.CommitTargetLoweringOpt(TLO);
457 805 : return true;
458 : }
459 :
460 : // If Old has more than one use then it must be Op, because the
461 : // AssumeSingleUse flag is not propogated to recursive calls of
462 : // SimplifyDemanded bits, so the only node with multiple use that
463 : // it will attempt to combine will be Op.
464 : assert(TLO.Old == Op);
465 :
466 : SmallVector <SDValue, 4> NewOps;
467 63 : for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
468 42 : if (i == OpIdx) {
469 21 : NewOps.push_back(TLO.New);
470 21 : continue;
471 : }
472 42 : NewOps.push_back(User->getOperand(i));
473 : }
474 42 : User = TLO.DAG.UpdateNodeOperands(User, NewOps);
475 : // Op has less users now, so we may be able to perform additional combines
476 : // with it.
477 21 : DCI.AddToWorklist(Op.getNode());
478 : // User's operands have been updated, so we may be able to do new combines
479 : // with it.
480 21 : DCI.AddToWorklist(User);
481 : return true;
482 : }
483 :
484 14103 : bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
485 : DAGCombinerInfo &DCI) const {
486 14103 : SelectionDAG &DAG = DCI.DAG;
487 14103 : TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
488 28206 : !DCI.isBeforeLegalizeOps());
489 14103 : KnownBits Known;
490 :
491 14103 : bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
492 14103 : if (Simplified) {
493 1271 : DCI.AddToWorklist(Op.getNode());
494 1271 : DCI.CommitTargetLoweringOpt(TLO);
495 : }
496 14103 : return Simplified;
497 : }
498 :
499 : /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
500 : /// result of Op are ever used downstream. If we can use this information to
501 : /// simplify Op, create a new simplified DAG node and return true, returning the
502 : /// original and new nodes in Old and New. Otherwise, analyze the expression and
503 : /// return a mask of Known bits for the expression (used to simplify the
504 : /// caller). The Known bits may only be accurate for those bits in the
505 : /// DemandedMask.
506 14704887 : bool TargetLowering::SimplifyDemandedBits(SDValue Op,
507 : const APInt &OriginalDemandedBits,
508 : KnownBits &Known,
509 : TargetLoweringOpt &TLO,
510 : unsigned Depth,
511 : bool AssumeSingleUse) const {
512 14704887 : unsigned BitWidth = OriginalDemandedBits.getBitWidth();
513 : assert(Op.getScalarValueSizeInBits() == BitWidth &&
514 : "Mask size mismatches value type size!");
515 : APInt DemandedBits = OriginalDemandedBits;
516 : SDLoc dl(Op);
517 14704887 : auto &DL = TLO.DAG.getDataLayout();
518 :
519 : // Don't know anything.
520 14704887 : Known = KnownBits(BitWidth);
521 :
522 29409774 : if (Op.getOpcode() == ISD::Constant) {
523 : // We know all of the bits for a constant!
524 6298986 : Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
525 3149493 : Known.Zero = ~Known.One;
526 3149493 : return false;
527 : }
528 :
529 : // Other users may use these bits.
530 23110788 : EVT VT = Op.getValueType();
531 4601132 : if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
532 4599399 : if (Depth != 0) {
533 : // If not at the root, Just compute the Known bits to
534 : // simplify things downstream.
535 3619540 : TLO.DAG.computeKnownBits(Op, Known, Depth);
536 3619540 : return false;
537 : }
538 : // If this is the root being simplified, allow it to have multiple uses,
539 : // just set the DemandedBits to all bits.
540 1959718 : DemandedBits = APInt::getAllOnesValue(BitWidth);
541 6955995 : } else if (OriginalDemandedBits == 0) {
542 : // Not demanding any bits from Op.
543 2882 : if (!Op.isUndef())
544 1860 : return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
545 : return false;
546 6953113 : } else if (Depth == 6) { // Limit search depth.
547 : return false;
548 : }
549 :
550 7892629 : KnownBits Known2, KnownOut;
551 15785258 : switch (Op.getOpcode()) {
552 64619 : case ISD::BUILD_VECTOR:
553 : // Collect the known bits that are shared by every constant vector element.
554 64619 : Known.Zero.setAllBits(); Known.One.setAllBits();
555 274158 : for (SDValue SrcOp : Op->ops()) {
556 : if (!isa<ConstantSDNode>(SrcOp)) {
557 : // We can only handle all constant values - bail out with no known bits.
558 3441 : Known = KnownBits(BitWidth);
559 : return false;
560 : }
561 419078 : Known2.One = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
562 209539 : Known2.Zero = ~Known2.One;
563 :
564 : // BUILD_VECTOR can implicitly truncate sources, we must handle this.
565 209539 : if (Known2.One.getBitWidth() != BitWidth) {
566 : assert(Known2.getBitWidth() > BitWidth &&
567 : "Expected BUILD_VECTOR implicit truncation");
568 12388 : Known2 = Known2.trunc(BitWidth);
569 : }
570 :
571 : // Known bits are the values that are shared by every element.
572 : // TODO: support per-element known bits.
573 : Known.One &= Known2.One;
574 : Known.Zero &= Known2.Zero;
575 : }
576 : return false; // Don't fall through, will infinitely loop.
577 2394 : case ISD::CONCAT_VECTORS:
578 2394 : Known.Zero.setAllBits();
579 2394 : Known.One.setAllBits();
580 8962 : for (SDValue SrcOp : Op->ops()) {
581 6570 : if (SimplifyDemandedBits(SrcOp, DemandedBits, Known2, TLO, Depth + 1))
582 : return true;
583 : // Known bits are the values that are shared by every subvector.
584 : Known.One &= Known2.One;
585 : Known.Zero &= Known2.Zero;
586 2 : }
587 2392 : break;
588 500580 : case ISD::AND: {
589 500580 : SDValue Op0 = Op.getOperand(0);
590 500580 : SDValue Op1 = Op.getOperand(1);
591 :
592 : // If the RHS is a constant, check to see if the LHS would be zero without
593 : // using the bits from the RHS. Below, we use knowledge about the RHS to
594 : // simplify the LHS, here we're using information from the LHS to simplify
595 : // the RHS.
596 500580 : if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
597 298332 : KnownBits LHSKnown;
598 : // Do not increment Depth here; that can cause an infinite loop.
599 408695 : TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth);
600 : // If the LHS already has zeros where RHSC does, this 'and' is dead.
601 408695 : if ((LHSKnown.Zero & DemandedBits) ==
602 817390 : (~RHSC->getAPIntValue() & DemandedBits))
603 110363 : return TLO.CombineTo(Op, Op0);
604 :
605 : // If any of the set bits in the RHS are known zero on the LHS, shrink
606 : // the constant.
607 618906 : if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
608 : return true;
609 :
610 : // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
611 : // constant, but if this 'and' is only clearing bits that were just set by
612 : // the xor, then this 'and' can be eliminated by shrinking the mask of
613 : // the xor. For example, for a 32-bit X:
614 : // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
615 300120 : if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
616 300991 : LHSKnown.One == ~RHSC->getAPIntValue()) {
617 32 : SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
618 16 : return TLO.CombineTo(Op, Xor);
619 : }
620 : }
621 :
622 390217 : if (SimplifyDemandedBits(Op1, DemandedBits, Known, TLO, Depth + 1))
623 : return true;
624 : assert(!Known.hasConflict() && "Bits known to be one AND zero?");
625 1170432 : if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, Known2, TLO,
626 : Depth + 1))
627 : return true;
628 : assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
629 :
630 : // If all of the demanded bits are known one on one side, return the other.
631 : // These bits cannot contribute to the result of the 'and'.
632 744256 : if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
633 280 : return TLO.CombineTo(Op, Op0);
634 371848 : if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
635 2851 : return TLO.CombineTo(Op, Op1);
636 : // If all of the demanded bits in the inputs are known zeros, return zero.
637 368997 : if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
638 4 : return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
639 : // If the RHS is a constant, see if we can simplify it.
640 737986 : if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
641 : return true;
642 : // If the operation can be done in a smaller type, do so.
643 368993 : if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
644 : return true;
645 :
646 : // Output known-1 bits are only known if set in both the LHS & RHS.
647 : Known.One &= Known2.One;
648 : // Output known-0 are known to be clear if zero in either the LHS | RHS.
649 : Known.Zero |= Known2.Zero;
650 366115 : break;
651 : }
652 192628 : case ISD::OR: {
653 192628 : SDValue Op0 = Op.getOperand(0);
654 192628 : SDValue Op1 = Op.getOperand(1);
655 :
656 192628 : if (SimplifyDemandedBits(Op1, DemandedBits, Known, TLO, Depth + 1))
657 10872 : return true;
658 : assert(!Known.hasConflict() && "Bits known to be one AND zero?");
659 562539 : if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, Known2, TLO, Depth + 1))
660 : return true;
661 : assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
662 :
663 : // If all of the demanded bits are known zero on one side, return the other.
664 : // These bits cannot contribute to the result of the 'or'.
665 366710 : if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
666 453 : return TLO.CombineTo(Op, Op0);
667 182902 : if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
668 1003 : return TLO.CombineTo(Op, Op1);
669 : // If the RHS is a constant, see if we can simplify it.
670 181899 : if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
671 : return true;
672 : // If the operation can be done in a smaller type, do so.
673 181861 : if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
674 : return true;
675 :
676 : // Output known-0 bits are only known if clear in both the LHS & RHS.
677 : Known.Zero &= Known2.Zero;
678 : // Output known-1 are known to be set if set in either the LHS | RHS.
679 : Known.One |= Known2.One;
680 181756 : break;
681 : }
682 156312 : case ISD::XOR: {
683 156312 : SDValue Op0 = Op.getOperand(0);
684 156312 : SDValue Op1 = Op.getOperand(1);
685 :
686 156312 : if (SimplifyDemandedBits(Op1, DemandedBits, Known, TLO, Depth + 1))
687 11724 : return true;
688 : assert(!Known.hasConflict() && "Bits known to be one AND zero?");
689 156214 : if (SimplifyDemandedBits(Op0, DemandedBits, Known2, TLO, Depth + 1))
690 : return true;
691 : assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
692 :
693 : // If all of the demanded bits are known zero on one side, return the other.
694 : // These bits cannot contribute to the result of the 'xor'.
695 312104 : if (DemandedBits.isSubsetOf(Known.Zero))
696 17 : return TLO.CombineTo(Op, Op0);
697 156035 : if (DemandedBits.isSubsetOf(Known2.Zero))
698 7 : return TLO.CombineTo(Op, Op1);
699 : // If the operation can be done in a smaller type, do so.
700 156028 : if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
701 : return true;
702 :
703 : // If all of the unknown bits are known to be zero on one side or the other
704 : // (but not both) turn this into an *inclusive* or.
705 : // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
706 155981 : if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
707 144 : return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
708 :
709 : // Output known-0 bits are known if clear or set in both the LHS & RHS.
710 311818 : KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
711 : // Output known-1 are known to be set if set in only one of the LHS, RHS.
712 155909 : KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
713 :
714 155909 : if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
715 : // If one side is a constant, and all of the known set bits on the other
716 : // side are also set in the constant, turn this into an AND, as we know
717 : // the bits will be cleared.
718 : // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
719 : // NB: it is okay if more bits are known than are requested
720 230532 : if (C->getAPIntValue() == Known2.One) {
721 : SDValue ANDC =
722 12 : TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
723 8 : return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
724 : }
725 :
726 : // If the RHS is a constant, see if we can change it. Don't alter a -1
727 : // constant because that's a 'not' op, and that is better for combining
728 : // and codegen.
729 115262 : if (!C->isAllOnesValue()) {
730 23477 : if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
731 : // We're flipping all demanded bits. Flip the undemanded bits too.
732 11275 : SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
733 11275 : return TLO.CombineTo(Op, New);
734 : }
735 : // If we can't turn this into a 'not', try to shrink the constant.
736 12202 : if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
737 : return true;
738 : }
739 : }
740 :
741 144588 : Known = std::move(KnownOut);
742 144588 : break;
743 : }
744 5959 : case ISD::SELECT:
745 11918 : if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
746 : Depth + 1))
747 : return true;
748 11858 : if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
749 : Depth + 1))
750 : return true;
751 : assert(!Known.hasConflict() && "Bits known to be one AND zero?");
752 : assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
753 :
754 : // If the operands are constants, see if we can simplify them.
755 5887 : if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
756 : return true;
757 :
758 : // Only known if known in both the LHS and RHS.
759 5887 : Known.One &= Known2.One;
760 5887 : Known.Zero &= Known2.Zero;
761 : break;
762 3919 : case ISD::SELECT_CC:
763 7838 : if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
764 : Depth + 1))
765 : return true;
766 7838 : if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
767 : Depth + 1))
768 : return true;
769 : assert(!Known.hasConflict() && "Bits known to be one AND zero?");
770 : assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
771 :
772 : // If the operands are constants, see if we can simplify them.
773 3919 : if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
774 : return true;
775 :
776 : // Only known if known in both the LHS and RHS.
777 3919 : Known.One &= Known2.One;
778 3919 : Known.Zero &= Known2.Zero;
779 : break;
780 22383 : case ISD::SETCC: {
781 22383 : SDValue Op0 = Op.getOperand(0);
782 22383 : SDValue Op1 = Op.getOperand(1);
783 22383 : ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
784 : // If (1) we only need the sign-bit, (2) the setcc operands are the same
785 : // width as the setcc result, and (3) the result of a setcc conforms to 0 or
786 : // -1, we may be able to bypass the setcc.
787 12265 : if (DemandedBits.isSignMask() &&
788 22383 : Op0.getScalarValueSizeInBits() == BitWidth &&
789 1279 : getBooleanContents(VT) ==
790 : BooleanContent::ZeroOrNegativeOneBooleanContent) {
791 : // If we're testing X < 0, then this compare isn't needed - just use X!
792 : // FIXME: We're limiting to integer types here, but this should also work
793 : // if we don't care about FP signed-zero. The use of SETLT with FP means
794 : // that we don't care about NaNs.
795 1449 : if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
796 174 : (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
797 87 : return TLO.CombineTo(Op, Op0);
798 :
799 : // TODO: Should we check for other forms of sign-bit comparisons?
800 : // Examples: X <= -1, X >= 0
801 : }
802 22296 : if (getBooleanContents(Op0.getValueType()) ==
803 22296 : TargetLowering::ZeroOrOneBooleanContent &&
804 : BitWidth > 1)
805 7639 : Known.Zero.setBitsFrom(1);
806 22296 : break;
807 : }
808 235916 : case ISD::SHL: {
809 235916 : SDValue Op0 = Op.getOperand(0);
810 235916 : SDValue Op1 = Op.getOperand(1);
811 :
812 235916 : if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
813 : // If the shift count is an invalid immediate, don't do anything.
814 675495 : if (SA->getAPIntValue().uge(BitWidth))
815 : break;
816 :
817 225149 : unsigned ShAmt = SA->getZExtValue();
818 :
819 : // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
820 : // single shift. We can do this if the bottom bits (which are shifted
821 : // out) are never demanded.
822 225149 : if (Op0.getOpcode() == ISD::SRL) {
823 9609 : if (ShAmt &&
824 46487 : (DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
825 1558 : if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
826 3116 : if (SA2->getAPIntValue().ult(BitWidth)) {
827 1558 : unsigned C1 = SA2->getZExtValue();
828 : unsigned Opc = ISD::SHL;
829 1558 : int Diff = ShAmt - C1;
830 1558 : if (Diff < 0) {
831 87 : Diff = -Diff;
832 : Opc = ISD::SRL;
833 : }
834 :
835 3116 : SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
836 1558 : return TLO.CombineTo(
837 1558 : Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
838 : }
839 : }
840 : }
841 : }
842 :
843 447182 : if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), Known, TLO,
844 : Depth + 1))
845 : return true;
846 :
847 : // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
848 : // are not demanded. This will likely allow the anyext to be folded away.
849 210354 : if (Op0.getOpcode() == ISD::ANY_EXTEND) {
850 11635 : SDValue InnerOp = Op0.getOperand(0);
851 11635 : EVT InnerVT = InnerOp.getValueType();
852 : unsigned InnerBits = InnerVT.getScalarSizeInBits();
853 11669 : if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
854 34 : isTypeDesirableForOp(ISD::SHL, InnerVT)) {
855 18 : EVT ShTy = getShiftAmountTy(InnerVT, DL);
856 18 : if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
857 0 : ShTy = InnerVT;
858 : SDValue NarrowShl =
859 18 : TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
860 18 : TLO.DAG.getConstant(ShAmt, dl, ShTy));
861 18 : return TLO.CombineTo(
862 18 : Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
863 : }
864 : // Repeat the SHL optimization above in cases where an extension
865 : // intervenes: (shl (anyext (shr x, c1)), c2) to
866 : // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
867 : // aren't demanded (as above) and that the shifted upper c1 bits of
868 : // x aren't demanded.
869 11970 : if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
870 : InnerOp.hasOneUse()) {
871 353 : if (ConstantSDNode *SA2 =
872 353 : isConstOrConstSplat(InnerOp.getOperand(1))) {
873 333 : unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
874 331 : if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
875 : DemandedBits.getActiveBits() <=
876 333 : (InnerBits - InnerShAmt + ShAmt) &&
877 8 : DemandedBits.countTrailingZeros() >= ShAmt) {
878 2 : SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
879 2 : Op1.getValueType());
880 2 : SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
881 2 : InnerOp.getOperand(0));
882 2 : return TLO.CombineTo(
883 2 : Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
884 : }
885 : }
886 : }
887 : }
888 :
889 210334 : Known.Zero <<= ShAmt;
890 210334 : Known.One <<= ShAmt;
891 : // low bits known zero.
892 : Known.Zero.setLowBits(ShAmt);
893 : }
894 : break;
895 : }
896 279974 : case ISD::SRL: {
897 279974 : SDValue Op0 = Op.getOperand(0);
898 279974 : SDValue Op1 = Op.getOperand(1);
899 :
900 279974 : if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
901 : // If the shift count is an invalid immediate, don't do anything.
902 815403 : if (SA->getAPIntValue().uge(BitWidth))
903 : break;
904 :
905 271793 : unsigned ShAmt = SA->getZExtValue();
906 : APInt InDemandedMask = (DemandedBits << ShAmt);
907 :
908 : // If the shift is exact, then it does demand the low bits (and knows that
909 : // they are zero).
910 543586 : if (Op->getFlags().hasExact())
911 : InDemandedMask.setLowBits(ShAmt);
912 :
913 : // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
914 : // single shift. We can do this if the top bits (which are shifted out)
915 : // are never demanded.
916 271793 : if (Op0.getOpcode() == ISD::SHL) {
917 2123 : if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
918 1563 : if (ShAmt &&
919 6542 : (DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
920 2546 : if (SA2->getAPIntValue().ult(BitWidth)) {
921 1273 : unsigned C1 = SA2->getZExtValue();
922 : unsigned Opc = ISD::SRL;
923 1273 : int Diff = ShAmt - C1;
924 1273 : if (Diff < 0) {
925 6 : Diff = -Diff;
926 : Opc = ISD::SHL;
927 : }
928 :
929 2546 : SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
930 1273 : return TLO.CombineTo(
931 1273 : Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
932 : }
933 : }
934 : }
935 : }
936 :
937 : // Compute the new bits that are at the top now.
938 270520 : if (SimplifyDemandedBits(Op0, InDemandedMask, Known, TLO, Depth + 1))
939 : return true;
940 : assert(!Known.hasConflict() && "Bits known to be one AND zero?");
941 258697 : Known.Zero.lshrInPlace(ShAmt);
942 258697 : Known.One.lshrInPlace(ShAmt);
943 :
944 : Known.Zero.setHighBits(ShAmt); // High bits known zero.
945 : }
946 : break;
947 : }
948 40302 : case ISD::SRA: {
949 40302 : SDValue Op0 = Op.getOperand(0);
950 40302 : SDValue Op1 = Op.getOperand(1);
951 :
952 : // If this is an arithmetic shift right and only the low-bit is set, we can
953 : // always convert this into a logical shr, even if the shift amount is
954 : // variable. The low bit of the shift cannot be an input sign bit unless
955 : // the shift amount is >= the size of the datatype, which is undefined.
956 40302 : if (DemandedBits.isOneValue())
957 66 : return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
958 :
959 40269 : if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
960 : // If the shift count is an invalid immediate, don't do anything.
961 118377 : if (SA->getAPIntValue().uge(BitWidth))
962 : break;
963 :
964 39459 : unsigned ShAmt = SA->getZExtValue();
965 : APInt InDemandedMask = (DemandedBits << ShAmt);
966 :
967 : // If the shift is exact, then it does demand the low bits (and knows that
968 : // they are zero).
969 78918 : if (Op->getFlags().hasExact())
970 : InDemandedMask.setLowBits(ShAmt);
971 :
972 : // If any of the demanded bits are produced by the sign extension, we also
973 : // demand the input sign bit.
974 39459 : if (DemandedBits.countLeadingZeros() < ShAmt)
975 36509 : InDemandedMask.setSignBit();
976 :
977 39459 : if (SimplifyDemandedBits(Op0, InDemandedMask, Known, TLO, Depth + 1))
978 : return true;
979 : assert(!Known.hasConflict() && "Bits known to be one AND zero?");
980 73774 : Known.Zero.lshrInPlace(ShAmt);
981 36887 : Known.One.lshrInPlace(ShAmt);
982 :
983 : // If the input sign bit is known to be zero, or if none of the top bits
984 : // are demanded, turn this into an unsigned shift right.
985 73774 : if (Known.Zero[BitWidth - ShAmt - 1] ||
986 36874 : DemandedBits.countLeadingZeros() >= ShAmt) {
987 : SDNodeFlags Flags;
988 5774 : Flags.setExact(Op->getFlags().hasExact());
989 2887 : return TLO.CombineTo(
990 2887 : Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
991 : }
992 :
993 34000 : int Log2 = DemandedBits.exactLogBase2();
994 34000 : if (Log2 >= 0) {
995 : // The bit must come from the sign.
996 : SDValue NewSA =
997 314 : TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType());
998 314 : return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
999 : }
1000 :
1001 33843 : if (Known.One[BitWidth - ShAmt - 1])
1002 : // New bits are known one.
1003 : Known.One.setHighBits(ShAmt);
1004 : }
1005 : break;
1006 : }
1007 59003 : case ISD::SIGN_EXTEND_INREG: {
1008 59003 : SDValue Op0 = Op.getOperand(0);
1009 59003 : EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1010 : unsigned ExVTBits = ExVT.getScalarSizeInBits();
1011 :
1012 : // If we only care about the highest bit, don't bother shifting right.
1013 59003 : if (DemandedBits.isSignMask()) {
1014 : bool AlreadySignExtended =
1015 245 : TLO.DAG.ComputeNumSignBits(Op0) >= BitWidth - ExVTBits + 1;
1016 : // However if the input is already sign extended we expect the sign
1017 : // extension to be dropped altogether later and do not simplify.
1018 245 : if (!AlreadySignExtended) {
1019 : // Compute the correct shift amount type, which must be getShiftAmountTy
1020 : // for scalar types after legalization.
1021 213 : EVT ShiftAmtTy = VT;
1022 421 : if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
1023 18 : ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
1024 :
1025 : SDValue ShiftAmt =
1026 213 : TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
1027 213 : return TLO.CombineTo(Op,
1028 213 : TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
1029 : }
1030 : }
1031 :
1032 : // If none of the extended bits are demanded, eliminate the sextinreg.
1033 58790 : if (DemandedBits.getActiveBits() <= ExVTBits)
1034 404 : return TLO.CombineTo(Op, Op0);
1035 :
1036 58386 : APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
1037 :
1038 : // Since the sign extended bits are demanded, we know that the sign
1039 : // bit is demanded.
1040 58386 : InputDemandedBits.setBit(ExVTBits - 1);
1041 :
1042 58386 : if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
1043 : return true;
1044 : assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1045 :
1046 : // If the sign bit of the input is known set or clear, then we know the
1047 : // top bits of the result.
1048 :
1049 : // If the input sign bit is known zero, convert this into a zero extension.
1050 56236 : if (Known.Zero[ExVTBits - 1])
1051 0 : return TLO.CombineTo(
1052 0 : Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType()));
1053 :
1054 56236 : APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
1055 56236 : if (Known.One[ExVTBits - 1]) { // Input sign bit known set
1056 2 : Known.One.setBitsFrom(ExVTBits);
1057 2 : Known.Zero &= Mask;
1058 : } else { // Input sign bit unknown
1059 56234 : Known.Zero &= Mask;
1060 56234 : Known.One &= Mask;
1061 : }
1062 : break;
1063 : }
1064 14614 : case ISD::BUILD_PAIR: {
1065 29228 : EVT HalfVT = Op.getOperand(0).getValueType();
1066 : unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
1067 :
1068 14614 : APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
1069 29228 : APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
1070 :
1071 : KnownBits KnownLo, KnownHi;
1072 :
1073 29228 : if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
1074 1622 : return true;
1075 :
1076 28758 : if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
1077 : return true;
1078 :
1079 25984 : Known.Zero = KnownLo.Zero.zext(BitWidth) |
1080 25984 : KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
1081 :
1082 25984 : Known.One = KnownLo.One.zext(BitWidth) |
1083 25984 : KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
1084 12992 : break;
1085 : }
1086 42681 : case ISD::ZERO_EXTEND: {
1087 42681 : unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
1088 :
1089 : // If none of the top bits are demanded, convert this into an any_extend.
1090 42681 : if (DemandedBits.getActiveBits() <= OperandBitWidth)
1091 7826 : return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1092 3913 : Op.getOperand(0)));
1093 :
1094 38768 : APInt InMask = DemandedBits.trunc(OperandBitWidth);
1095 77536 : if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
1096 : return true;
1097 : assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1098 37742 : Known = Known.zext(BitWidth);
1099 37742 : Known.Zero.setBitsFrom(OperandBitWidth);
1100 : break;
1101 : }
1102 17932 : case ISD::SIGN_EXTEND: {
1103 17932 : SDValue Src = Op.getOperand(0);
1104 17932 : unsigned InBits = Src.getScalarValueSizeInBits();
1105 :
1106 : // If none of the top bits are demanded, convert this into an any_extend.
1107 17932 : if (DemandedBits.getActiveBits() <= InBits)
1108 1674 : return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
1109 :
1110 : // Since some of the sign extended bits are demanded, we know that the sign
1111 : // bit is demanded.
1112 17095 : APInt InDemandedBits = DemandedBits.trunc(InBits);
1113 17095 : InDemandedBits.setBit(InBits - 1);
1114 :
1115 17095 : if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
1116 : return true;
1117 : assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1118 : // If the sign bit is known one, the top bits match.
1119 17031 : Known = Known.sext(BitWidth);
1120 :
1121 : // If the sign bit is known zero, convert this to a zero extend.
1122 17031 : if (Known.isNonNegative())
1123 4728 : return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Src));
1124 : break;
1125 : }
1126 444 : case ISD::SIGN_EXTEND_VECTOR_INREG: {
1127 : // TODO - merge this with SIGN_EXTEND above?
1128 444 : SDValue Src = Op.getOperand(0);
1129 444 : unsigned InBits = Src.getScalarValueSizeInBits();
1130 :
1131 444 : APInt InDemandedBits = DemandedBits.trunc(InBits);
1132 :
1133 : // If some of the sign extended bits are demanded, we know that the sign
1134 : // bit is demanded.
1135 444 : if (InBits < DemandedBits.getActiveBits())
1136 426 : InDemandedBits.setBit(InBits - 1);
1137 :
1138 444 : if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
1139 : return true;
1140 : assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1141 : // If the sign bit is known one, the top bits match.
1142 442 : Known = Known.sext(BitWidth);
1143 : break;
1144 : }
1145 109539 : case ISD::ANY_EXTEND: {
1146 109539 : unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
1147 109539 : APInt InMask = DemandedBits.trunc(OperandBitWidth);
1148 219078 : if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
1149 : return true;
1150 : assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1151 97799 : Known = Known.zext(BitWidth);
1152 : break;
1153 : }
1154 900015 : case ISD::TRUNCATE: {
1155 900015 : SDValue Src = Op.getOperand(0);
1156 :
1157 : // Simplify the input, using demanded bit information, and compute the known
1158 : // zero/one bits live out.
1159 900015 : unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
1160 900015 : APInt TruncMask = DemandedBits.zext(OperandBitWidth);
1161 900015 : if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
1162 : return true;
1163 885807 : Known = Known.trunc(BitWidth);
1164 :
1165 : // If the input is only used by this truncate, see if we can shrink it based
1166 : // on the known demanded bits.
1167 885807 : if (Src.getNode()->hasOneUse()) {
1168 : switch (Src.getOpcode()) {
1169 : default:
1170 785876 : break;
1171 43099 : case ISD::SRL:
1172 : // Shrink SRL by a constant if none of the high bits shifted in are
1173 : // demanded.
1174 43099 : if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
1175 : // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
1176 : // undesirable.
1177 : break;
1178 33838 : ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1179 : if (!ShAmt)
1180 : break;
1181 33652 : SDValue Shift = Src.getOperand(1);
1182 33652 : if (TLO.LegalTypes()) {
1183 30605 : uint64_t ShVal = ShAmt->getZExtValue();
1184 30605 : Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
1185 : }
1186 :
1187 67304 : if (ShAmt->getZExtValue() < BitWidth) {
1188 : APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
1189 3239 : OperandBitWidth - BitWidth);
1190 6478 : HighBits.lshrInPlace(ShAmt->getZExtValue());
1191 6478 : HighBits = HighBits.trunc(BitWidth);
1192 :
1193 3239 : if (!(HighBits & DemandedBits)) {
1194 : // None of the shifted in bits are needed. Add a truncate of the
1195 : // shift input, then shift it.
1196 : SDValue NewTrunc =
1197 3474 : TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
1198 1158 : return TLO.CombineTo(
1199 1158 : Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
1200 : }
1201 : }
1202 : break;
1203 : }
1204 : }
1205 :
1206 : assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1207 : break;
1208 : }
1209 55664 : case ISD::AssertZext: {
1210 : // AssertZext demands all of the high bits, plus any of the low bits
1211 : // demanded by its users.
1212 55664 : EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1213 55664 : APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
1214 222656 : if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits,
1215 : Known, TLO, Depth+1))
1216 : return true;
1217 : assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1218 :
1219 111328 : Known.Zero |= ~InMask;
1220 : break;
1221 : }
1222 131125 : case ISD::BITCAST: {
1223 131125 : SDValue Src = Op.getOperand(0);
1224 131125 : EVT SrcVT = Src.getValueType();
1225 131125 : unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
1226 :
1227 : // If this is an FP->Int bitcast and if the sign bit is the only
1228 : // thing demanded, turn this into a FGETSIGN.
1229 19600 : if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
1230 146796 : DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
1231 435 : SrcVT.isFloatingPoint()) {
1232 : bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
1233 : bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
1234 435 : if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
1235 : SrcVT != MVT::f128) {
1236 : // Cannot eliminate/lower SHL for f128 yet.
1237 5 : EVT Ty = OpVTLegal ? VT : MVT::i32;
1238 : // Make a FGETSIGN + SHL to move the sign bit into the appropriate
1239 : // place. We expect the SHL to be eliminated by other optimizations.
1240 10 : SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
1241 5 : unsigned OpVTSizeInBits = Op.getValueSizeInBits();
1242 5 : if (!OpVTLegal && OpVTSizeInBits > 32)
1243 0 : Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
1244 5 : unsigned ShVal = Op.getValueSizeInBits() - 1;
1245 5 : SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
1246 5 : return TLO.CombineTo(Op,
1247 5 : TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
1248 : }
1249 : }
1250 : // If bitcast from a vector and the mask covers entire elements, see if we
1251 : // can use SimplifyDemandedVectorElts.
1252 : // TODO - bigendian once we have test coverage.
1253 : // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.
1254 125270 : if (SrcVT.isVector() && NumSrcEltBits > 1 &&
1255 252659 : (BitWidth % NumSrcEltBits) == 0 &&
1256 107977 : TLO.DAG.getDataLayout().isLittleEndian()) {
1257 107039 : unsigned Scale = BitWidth / NumSrcEltBits;
1258 : auto GetDemandedSubMask = [&](APInt &DemandedSubElts) -> bool {
1259 : DemandedSubElts = APInt::getNullValue(Scale);
1260 : for (unsigned i = 0; i != Scale; ++i) {
1261 : unsigned Offset = i * NumSrcEltBits;
1262 : APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
1263 : if (Sub.isAllOnesValue())
1264 : DemandedSubElts.setBit(i);
1265 : else if (!Sub.isNullValue())
1266 : return false;
1267 : }
1268 : return true;
1269 107039 : };
1270 :
1271 : APInt DemandedSubElts;
1272 107039 : if (GetDemandedSubMask(DemandedSubElts)) {
1273 : unsigned NumSrcElts = SrcVT.getVectorNumElements();
1274 97154 : APInt DemandedElts = APInt::getSplat(NumSrcElts, DemandedSubElts);
1275 :
1276 : APInt KnownUndef, KnownZero;
1277 97154 : if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
1278 : TLO, Depth + 1))
1279 : return true;
1280 : }
1281 : }
1282 : // If this is a bitcast, let computeKnownBits handle it. Only do this on a
1283 : // recursive call where Known may be useful to the caller.
1284 129730 : if (Depth > 0) {
1285 123486 : TLO.DAG.computeKnownBits(Op, Known, Depth);
1286 123486 : return false;
1287 : }
1288 6244 : break;
1289 : }
1290 3275915 : case ISD::ADD:
1291 : case ISD::MUL:
1292 : case ISD::SUB: {
1293 : // Add, Sub, and Mul don't demand any bits in positions beyond that
1294 : // of the highest bit demanded of them.
1295 3275915 : SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
1296 3275915 : unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
1297 3275915 : APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
1298 6545063 : if (SimplifyDemandedBits(Op0, LoMask, Known2, TLO, Depth + 1) ||
1299 6539371 : SimplifyDemandedBits(Op1, LoMask, Known2, TLO, Depth + 1) ||
1300 : // See if the operation should be performed at a smaller bit width.
1301 3263456 : ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
1302 13771 : SDNodeFlags Flags = Op.getNode()->getFlags();
1303 13771 : if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1304 : // Disable the nsw and nuw flags. We can no longer guarantee that we
1305 : // won't wrap after simplification.
1306 : Flags.setNoSignedWrap(false);
1307 : Flags.setNoUnsignedWrap(false);
1308 1073 : SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
1309 1073 : Flags);
1310 1073 : return TLO.CombineTo(Op, NewOp);
1311 : }
1312 : return true;
1313 : }
1314 :
1315 : // If we have a constant operand, we may be able to turn it into -1 if we
1316 : // do not demand the high bits. This can make the constant smaller to
1317 : // encode, allow more general folding, or match specialized instruction
1318 : // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
1319 : // is probably not useful (and could be detrimental).
1320 3262144 : ConstantSDNode *C = isConstOrConstSplat(Op1);
1321 3262144 : APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
1322 10923323 : if (C && !C->isAllOnesValue() && !C->isOne() &&
1323 7587472 : (C->getAPIntValue() | HighMask).isAllOnesValue()) {
1324 21 : SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
1325 : // We can't guarantee that the new math op doesn't wrap, so explicitly
1326 : // clear those flags to prevent folding with a potential existing node
1327 : // that has those flags set.
1328 : SDNodeFlags Flags;
1329 : Flags.setNoSignedWrap(false);
1330 : Flags.setNoUnsignedWrap(false);
1331 42 : SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
1332 21 : return TLO.CombineTo(Op, NewOp);
1333 : }
1334 :
1335 : LLVM_FALLTHROUGH;
1336 : }
1337 : default:
1338 : // Just use computeKnownBits to compute output bits.
1339 5042834 : TLO.DAG.computeKnownBits(Op, Known, Depth);
1340 5042834 : break;
1341 : }
1342 :
1343 : // If we know the value of all of the demanded bits, return this as a
1344 : // constant.
1345 14917708 : if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
1346 : // Avoid folding to a constant if any OpaqueConstant is involved.
1347 8608 : const SDNode *N = Op.getNode();
1348 : for (SDNodeIterator I = SDNodeIterator::begin(N),
1349 : E = SDNodeIterator::end(N);
1350 24170 : I != E; ++I) {
1351 : SDNode *Op = *I;
1352 : if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
1353 8975 : if (C->isOpaque())
1354 : return false;
1355 : }
1356 8584 : return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
1357 : }
1358 :
1359 : return false;
1360 : }
1361 :
1362 76309 : bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
1363 : const APInt &DemandedElts,
1364 : APInt &KnownUndef,
1365 : APInt &KnownZero,
1366 : DAGCombinerInfo &DCI) const {
1367 76309 : SelectionDAG &DAG = DCI.DAG;
1368 76309 : TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
1369 152618 : !DCI.isBeforeLegalizeOps());
1370 :
1371 : bool Simplified =
1372 76309 : SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
1373 76309 : if (Simplified) {
1374 69 : DCI.AddToWorklist(Op.getNode());
1375 69 : DCI.CommitTargetLoweringOpt(TLO);
1376 : }
1377 76309 : return Simplified;
1378 : }
1379 :
1380 1556390 : bool TargetLowering::SimplifyDemandedVectorElts(
1381 : SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef,
1382 : APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
1383 : bool AssumeSingleUse) const {
1384 3112780 : EVT VT = Op.getValueType();
1385 : APInt DemandedElts = DemandedEltMask;
1386 1556390 : unsigned NumElts = DemandedElts.getBitWidth();
1387 : assert(VT.isVector() && "Expected vector op");
1388 : assert(VT.getVectorNumElements() == NumElts &&
1389 : "Mask size mismatches value type element count!");
1390 :
1391 3112780 : KnownUndef = KnownZero = APInt::getNullValue(NumElts);
1392 :
1393 : // Undef operand.
1394 3112780 : if (Op.isUndef()) {
1395 89273 : KnownUndef.setAllBits();
1396 89273 : return false;
1397 : }
1398 :
1399 : // If Op has other users, assume that all elements are needed.
1400 550728 : if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
1401 322343 : DemandedElts.setAllBits();
1402 :
1403 : // Not demanding any elements from Op.
1404 1467117 : if (DemandedElts == 0) {
1405 1055 : KnownUndef.setAllBits();
1406 1055 : return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1407 : }
1408 :
1409 : // Limit search depth.
1410 1466062 : if (Depth >= 6)
1411 : return false;
1412 :
1413 : SDLoc DL(Op);
1414 : unsigned EltSizeInBits = VT.getScalarSizeInBits();
1415 :
1416 2836202 : switch (Op.getOpcode()) {
1417 : case ISD::SCALAR_TO_VECTOR: {
1418 35853 : if (!DemandedElts[0]) {
1419 0 : KnownUndef.setAllBits();
1420 0 : return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1421 : }
1422 35853 : KnownUndef.setHighBits(NumElts - 1);
1423 : break;
1424 : }
1425 285947 : case ISD::BITCAST: {
1426 285947 : SDValue Src = Op.getOperand(0);
1427 285947 : EVT SrcVT = Src.getValueType();
1428 :
1429 : // We only handle vectors here.
1430 : // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
1431 285947 : if (!SrcVT.isVector())
1432 : break;
1433 :
1434 : // Fast handling of 'identity' bitcasts.
1435 : unsigned NumSrcElts = SrcVT.getVectorNumElements();
1436 251379 : if (NumSrcElts == NumElts)
1437 22943 : return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
1438 23855 : KnownZero, TLO, Depth + 1);
1439 :
1440 : APInt SrcZero, SrcUndef;
1441 228436 : APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
1442 :
1443 : // Bitcast from 'large element' src vector to 'small element' vector, we
1444 : // must demand a source element if any DemandedElt maps to it.
1445 228436 : if ((NumElts % NumSrcElts) == 0) {
1446 145604 : unsigned Scale = NumElts / NumSrcElts;
1447 1847500 : for (unsigned i = 0; i != NumElts; ++i)
1448 1701896 : if (DemandedElts[i])
1449 1177854 : SrcDemandedElts.setBit(i / Scale);
1450 :
1451 145604 : if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
1452 : TLO, Depth + 1))
1453 : return true;
1454 :
1455 : // If the src element is zero/undef then all the output elements will be -
1456 : // only demanded elements are guaranteed to be correct.
1457 672639 : for (unsigned i = 0; i != NumSrcElts; ++i) {
1458 527607 : if (SrcDemandedElts[i]) {
1459 424759 : if (SrcZero[i])
1460 2726 : KnownZero.setBits(i * Scale, (i + 1) * Scale);
1461 424759 : if (SrcUndef[i])
1462 12932 : KnownUndef.setBits(i * Scale, (i + 1) * Scale);
1463 : }
1464 : }
1465 : }
1466 :
1467 : // Bitcast from 'small element' src vector to 'large element' vector, we
1468 : // demand all smaller source elements covered by the larger demanded element
1469 : // of this vector.
1470 227864 : if ((NumSrcElts % NumElts) == 0) {
1471 82832 : unsigned Scale = NumSrcElts / NumElts;
1472 365388 : for (unsigned i = 0; i != NumElts; ++i)
1473 282556 : if (DemandedElts[i])
1474 199109 : SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
1475 :
1476 82832 : if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
1477 : TLO, Depth + 1))
1478 : return true;
1479 :
1480 : // If all the src elements covering an output element are zero/undef, then
1481 : // the output element will be as well, assuming it was demanded.
1482 363855 : for (unsigned i = 0; i != NumElts; ++i) {
1483 281363 : if (DemandedElts[i]) {
1484 396934 : if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
1485 : KnownZero.setBit(i);
1486 396934 : if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
1487 : KnownUndef.setBit(i);
1488 : }
1489 : }
1490 : }
1491 : break;
1492 : }
1493 : case ISD::BUILD_VECTOR: {
1494 : // Check all elements and simplify any unused elements with UNDEF.
1495 54702 : if (!DemandedElts.isAllOnesValue()) {
1496 : // Don't simplify BROADCASTS.
1497 15681 : if (llvm::any_of(Op->op_values(),
1498 0 : [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
1499 10260 : SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
1500 : bool Updated = false;
1501 64045 : for (unsigned i = 0; i != NumElts; ++i) {
1502 53785 : if (!DemandedElts[i] && !Ops[i].isUndef()) {
1503 5412 : Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
1504 : KnownUndef.setBit(i);
1505 : Updated = true;
1506 : }
1507 : }
1508 10260 : if (Updated)
1509 3622 : return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
1510 : }
1511 : }
1512 549206 : for (unsigned i = 0; i != NumElts; ++i) {
1513 496315 : SDValue SrcOp = Op.getOperand(i);
1514 992630 : if (SrcOp.isUndef()) {
1515 : KnownUndef.setBit(i);
1516 905059 : } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
1517 723737 : (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
1518 : KnownZero.setBit(i);
1519 : }
1520 : }
1521 : break;
1522 : }
1523 20221 : case ISD::CONCAT_VECTORS: {
1524 40442 : EVT SubVT = Op.getOperand(0).getValueType();
1525 : unsigned NumSubVecs = Op.getNumOperands();
1526 : unsigned NumSubElts = SubVT.getVectorNumElements();
1527 78287 : for (unsigned i = 0; i != NumSubVecs; ++i) {
1528 58711 : SDValue SubOp = Op.getOperand(i);
1529 58711 : APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1530 : APInt SubUndef, SubZero;
1531 58711 : if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
1532 : Depth + 1))
1533 : return true;
1534 58066 : KnownUndef.insertBits(SubUndef, i * NumSubElts);
1535 58066 : KnownZero.insertBits(SubZero, i * NumSubElts);
1536 : }
1537 19576 : break;
1538 : }
1539 9683 : case ISD::INSERT_SUBVECTOR: {
1540 : if (!isa<ConstantSDNode>(Op.getOperand(2)))
1541 : break;
1542 9683 : SDValue Base = Op.getOperand(0);
1543 9683 : SDValue Sub = Op.getOperand(1);
1544 9683 : EVT SubVT = Sub.getValueType();
1545 : unsigned NumSubElts = SubVT.getVectorNumElements();
1546 9683 : const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue();
1547 19366 : if (Idx.uge(NumElts - NumSubElts))
1548 : break;
1549 7236 : unsigned SubIdx = Idx.getZExtValue();
1550 7236 : APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
1551 : APInt SubUndef, SubZero;
1552 7236 : if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
1553 : Depth + 1))
1554 : return true;
1555 : APInt BaseElts = DemandedElts;
1556 7235 : BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
1557 7235 : if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
1558 : Depth + 1))
1559 : return true;
1560 7235 : KnownUndef.insertBits(SubUndef, SubIdx);
1561 7235 : KnownZero.insertBits(SubZero, SubIdx);
1562 : break;
1563 : }
1564 122700 : case ISD::EXTRACT_SUBVECTOR: {
1565 122700 : SDValue Src = Op.getOperand(0);
1566 : ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1567 122700 : unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1568 122700 : if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
1569 : // Offset the demanded elts by the subvector index.
1570 : uint64_t Idx = SubIdx->getZExtValue();
1571 245400 : APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
1572 : APInt SrcUndef, SrcZero;
1573 122700 : if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
1574 : Depth + 1))
1575 : return true;
1576 122409 : KnownUndef = SrcUndef.extractBits(NumElts, Idx);
1577 244818 : KnownZero = SrcZero.extractBits(NumElts, Idx);
1578 : }
1579 122409 : break;
1580 : }
1581 15798 : case ISD::INSERT_VECTOR_ELT: {
1582 15798 : SDValue Vec = Op.getOperand(0);
1583 15798 : SDValue Scl = Op.getOperand(1);
1584 : auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1585 :
1586 : // For a legal, constant insertion index, if we don't need this insertion
1587 : // then strip it, else remove it from the demanded elts.
1588 29062 : if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
1589 14531 : unsigned Idx = CIdx->getZExtValue();
1590 14531 : if (!DemandedElts[Idx])
1591 356 : return TLO.CombineTo(Op, Vec);
1592 : DemandedElts.clearBit(Idx);
1593 :
1594 14445 : if (SimplifyDemandedVectorElts(Vec, DemandedElts, KnownUndef,
1595 : KnownZero, TLO, Depth + 1))
1596 : return true;
1597 :
1598 : KnownUndef.clearBit(Idx);
1599 14183 : if (Scl.isUndef())
1600 : KnownUndef.setBit(Idx);
1601 :
1602 : KnownZero.clearBit(Idx);
1603 14183 : if (isNullConstant(Scl) || isNullFPConstant(Scl))
1604 : KnownZero.setBit(Idx);
1605 15442 : break;
1606 : }
1607 :
1608 : APInt VecUndef, VecZero;
1609 1267 : if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
1610 : Depth + 1))
1611 : return true;
1612 : // Without knowing the insertion index we can't set KnownUndef/KnownZero.
1613 : break;
1614 : }
1615 : case ISD::VSELECT: {
1616 : // Try to transform the select condition based on the current demanded
1617 : // elements.
1618 : // TODO: If a condition element is undef, we can choose from one arm of the
1619 : // select (and if one arm is undef, then we can propagate that to the
1620 : // result).
1621 : // TODO - add support for constant vselect masks (see IR version of this).
1622 : APInt UnusedUndef, UnusedZero;
1623 17762 : if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
1624 : UnusedZero, TLO, Depth + 1))
1625 : return true;
1626 :
1627 : // See if we can simplify either vselect operand.
1628 : APInt DemandedLHS(DemandedElts);
1629 : APInt DemandedRHS(DemandedElts);
1630 : APInt UndefLHS, ZeroLHS;
1631 : APInt UndefRHS, ZeroRHS;
1632 17690 : if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
1633 : ZeroLHS, TLO, Depth + 1))
1634 : return true;
1635 17666 : if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
1636 : ZeroRHS, TLO, Depth + 1))
1637 : return true;
1638 :
1639 8828 : KnownUndef = UndefLHS & UndefRHS;
1640 8828 : KnownZero = ZeroLHS & ZeroRHS;
1641 : break;
1642 : }
1643 : case ISD::VECTOR_SHUFFLE: {
1644 114358 : ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1645 :
1646 : // Collect demanded elements from shuffle operands..
1647 : APInt DemandedLHS(NumElts, 0);
1648 : APInt DemandedRHS(NumElts, 0);
1649 1920241 : for (unsigned i = 0; i != NumElts; ++i) {
1650 1805883 : int M = ShuffleMask[i];
1651 3006432 : if (M < 0 || !DemandedElts[i])
1652 : continue;
1653 : assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
1654 1188450 : if (M < (int)NumElts)
1655 937472 : DemandedLHS.setBit(M);
1656 : else
1657 250978 : DemandedRHS.setBit(M - NumElts);
1658 : }
1659 :
1660 : // See if we can simplify either shuffle operand.
1661 : APInt UndefLHS, ZeroLHS;
1662 : APInt UndefRHS, ZeroRHS;
1663 228716 : if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
1664 : ZeroLHS, TLO, Depth + 1))
1665 : return true;
1666 225150 : if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
1667 : ZeroRHS, TLO, Depth + 1))
1668 : return true;
1669 :
1670 : // Simplify mask using undef elements from LHS/RHS.
1671 : bool Updated = false;
1672 : bool IdentityLHS = true, IdentityRHS = true;
1673 111959 : SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
1674 1889746 : for (unsigned i = 0; i != NumElts; ++i) {
1675 1777787 : int &M = NewMask[i];
1676 1777787 : if (M < 0)
1677 : continue;
1678 2103367 : if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
1679 490924 : (M >= (int)NumElts && UndefRHS[M - NumElts])) {
1680 : Updated = true;
1681 13369 : M = -1;
1682 : }
1683 1179225 : IdentityLHS &= (M < 0) || (M == (int)i);
1684 1179225 : IdentityRHS &= (M < 0) || ((M - NumElts) == i);
1685 : }
1686 :
1687 : // Update legal shuffle masks based on demanded elements if it won't reduce
1688 : // to Identity which can cause premature removal of the shuffle mask.
1689 113068 : if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps &&
1690 113254 : isShuffleMaskLegal(NewMask, VT))
1691 923 : return TLO.CombineTo(Op,
1692 923 : TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0),
1693 923 : Op.getOperand(1), NewMask));
1694 :
1695 : // Propagate undef/zero elements from LHS/RHS.
1696 1881229 : for (unsigned i = 0; i != NumElts; ++i) {
1697 1770193 : int M = ShuffleMask[i];
1698 1770193 : if (M < 0) {
1699 : KnownUndef.setBit(i);
1700 1172461 : } else if (M < (int)NumElts) {
1701 1851338 : if (UndefLHS[M])
1702 : KnownUndef.setBit(i);
1703 925669 : if (ZeroLHS[M])
1704 : KnownZero.setBit(i);
1705 : } else {
1706 493584 : if (UndefRHS[M - NumElts])
1707 : KnownUndef.setBit(i);
1708 246792 : if (ZeroRHS[M - NumElts])
1709 : KnownZero.setBit(i);
1710 : }
1711 : }
1712 : break;
1713 : }
1714 : case ISD::ADD:
1715 : case ISD::SUB:
1716 : case ISD::FADD:
1717 : case ISD::FSUB:
1718 : case ISD::FMUL:
1719 : case ISD::FDIV:
1720 : case ISD::FREM: {
1721 : APInt SrcUndef, SrcZero;
1722 87166 : if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
1723 : SrcZero, TLO, Depth + 1))
1724 : return true;
1725 86952 : if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
1726 : KnownZero, TLO, Depth + 1))
1727 : return true;
1728 : KnownZero &= SrcZero;
1729 : KnownUndef &= SrcUndef;
1730 : break;
1731 : }
1732 3078 : case ISD::TRUNCATE:
1733 6156 : if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
1734 : KnownZero, TLO, Depth + 1))
1735 7 : return true;
1736 : break;
1737 703297 : default: {
1738 703297 : if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
1739 191892 : if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
1740 191892 : KnownZero, TLO, Depth))
1741 338 : return true;
1742 : break;
1743 : }
1744 : }
1745 :
1746 : assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
1747 : return false;
1748 : }
1749 :
1750 : /// Determine which of the bits specified in Mask are known to be either zero or
1751 : /// one and return them in the Known.
1752 122936 : void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
1753 : KnownBits &Known,
1754 : const APInt &DemandedElts,
1755 : const SelectionDAG &DAG,
1756 : unsigned Depth) const {
1757 : assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1758 : Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1759 : Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1760 : Op.getOpcode() == ISD::INTRINSIC_VOID) &&
1761 : "Should use MaskedValueIsZero if you don't know whether Op"
1762 : " is a target node!");
1763 : Known.resetAll();
1764 122936 : }
1765 :
1766 1396852 : void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
1767 : KnownBits &Known,
1768 : const APInt &DemandedElts,
1769 : const SelectionDAG &DAG,
1770 : unsigned Depth) const {
1771 : assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
1772 :
1773 1396852 : if (unsigned Align = DAG.InferPtrAlignment(Op)) {
1774 : // The low bits are known zero if the pointer is aligned.
1775 1396852 : Known.Zero.setLowBits(Log2_32(Align));
1776 : }
1777 1396852 : }
1778 :
1779 : /// This method can be implemented by targets that want to expose additional
1780 : /// information about sign bits to the DAG Combiner.
1781 3559 : unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
1782 : const APInt &,
1783 : const SelectionDAG &,
1784 : unsigned Depth) const {
1785 : assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1786 : Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1787 : Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1788 : Op.getOpcode() == ISD::INTRINSIC_VOID) &&
1789 : "Should use ComputeNumSignBits if you don't know whether Op"
1790 : " is a target node!");
1791 3559 : return 1;
1792 : }
1793 :
1794 7580 : bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
1795 : SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
1796 : TargetLoweringOpt &TLO, unsigned Depth) const {
1797 : assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1798 : Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1799 : Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1800 : Op.getOpcode() == ISD::INTRINSIC_VOID) &&
1801 : "Should use SimplifyDemandedVectorElts if you don't know whether Op"
1802 : " is a target node!");
1803 7580 : return false;
1804 : }
1805 :
1806 0 : bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
1807 : const SelectionDAG &DAG,
1808 : bool SNaN,
1809 : unsigned Depth) const {
1810 : assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1811 : Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1812 : Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1813 : Op.getOpcode() == ISD::INTRINSIC_VOID) &&
1814 : "Should use isKnownNeverNaN if you don't know whether Op"
1815 : " is a target node!");
1816 0 : return false;
1817 : }
1818 :
1819 : // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
1820 : // work with truncating build vectors and vectors with elements of less than
1821 : // 8 bits.
1822 116384 : bool TargetLowering::isConstTrueVal(const SDNode *N) const {
1823 116384 : if (!N)
1824 : return false;
1825 :
1826 : APInt CVal;
1827 : if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
1828 158114 : CVal = CN->getAPIntValue();
1829 : } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
1830 8366 : auto *CN = BV->getConstantSplatNode();
1831 8366 : if (!CN)
1832 : return false;
1833 :
1834 : // If this is a truncating build vector, truncate the splat value.
1835 : // Otherwise, we may fail to match the expected values below.
1836 16226 : unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
1837 16226 : CVal = CN->getAPIntValue();
1838 8113 : if (BVEltWidth < CVal.getBitWidth())
1839 1032 : CVal = CVal.trunc(BVEltWidth);
1840 : } else {
1841 : return false;
1842 : }
1843 :
1844 174340 : switch (getBooleanContents(N->getValueType(0))) {
1845 : case UndefinedBooleanContent:
1846 726 : return CVal[0];
1847 : case ZeroOrOneBooleanContent:
1848 : return CVal.isOneValue();
1849 : case ZeroOrNegativeOneBooleanContent:
1850 : return CVal.isAllOnesValue();
1851 : }
1852 :
1853 0 : llvm_unreachable("Invalid boolean contents");
1854 : }
1855 :
1856 487 : bool TargetLowering::isConstFalseVal(const SDNode *N) const {
1857 487 : if (!N)
1858 : return false;
1859 :
1860 : const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
1861 : if (!CN) {
1862 : const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
1863 : if (!BV)
1864 : return false;
1865 :
1866 : // Only interested in constant splats, we don't care about undef
1867 : // elements in identifying boolean constants and getConstantSplatNode
1868 : // returns NULL if all ops are undef;
1869 0 : CN = BV->getConstantSplatNode();
1870 0 : if (!CN)
1871 : return false;
1872 : }
1873 :
1874 970 : if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
1875 0 : return !CN->getAPIntValue()[0];
1876 :
1877 485 : return CN->isNullValue();
1878 : }
1879 :
1880 8 : bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
1881 : bool SExt) const {
1882 : if (VT == MVT::i1)
1883 0 : return N->isOne();
1884 :
1885 8 : TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
1886 8 : switch (Cnt) {
1887 0 : case TargetLowering::ZeroOrOneBooleanContent:
1888 : // An extended value of 1 is always true, unless its original type is i1,
1889 : // in which case it will be sign extended to -1.
1890 0 : return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
1891 8 : case TargetLowering::UndefinedBooleanContent:
1892 : case TargetLowering::ZeroOrNegativeOneBooleanContent:
1893 16 : return N->isAllOnesValue() && SExt;
1894 : }
1895 0 : llvm_unreachable("Unexpected enumeration.");
1896 : }
1897 :
1898 : /// This helper function of SimplifySetCC tries to optimize the comparison when
1899 : /// either operand of the SetCC node is a bitwise-and instruction.
1900 261347 : SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
1901 : ISD::CondCode Cond,
1902 : DAGCombinerInfo &DCI,
1903 : const SDLoc &DL) const {
1904 : // Match these patterns in any of their permutations:
1905 : // (X & Y) == Y
1906 : // (X & Y) != Y
1907 261347 : if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
1908 : std::swap(N0, N1);
1909 :
1910 261347 : EVT OpVT = N0.getValueType();
1911 261347 : if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
1912 15617 : (Cond != ISD::SETEQ && Cond != ISD::SETNE))
1913 245730 : return SDValue();
1914 :
1915 15617 : SDValue X, Y;
1916 15617 : if (N0.getOperand(0) == N1) {
1917 177 : X = N0.getOperand(1);
1918 177 : Y = N0.getOperand(0);
1919 15440 : } else if (N0.getOperand(1) == N1) {
1920 387 : X = N0.getOperand(0);
1921 387 : Y = N0.getOperand(1);
1922 : } else {
1923 15053 : return SDValue();
1924 : }
1925 :
1926 564 : SelectionDAG &DAG = DCI.DAG;
1927 564 : SDValue Zero = DAG.getConstant(0, DL, OpVT);
1928 564 : if (DAG.isKnownToBeAPowerOfTwo(Y)) {
1929 : // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
1930 : // Note that where Y is variable and is known to have at most one bit set
1931 : // (for example, if it is Z & 1) we cannot do this; the expressions are not
1932 : // equivalent when Y == 0.
1933 190 : Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
1934 380 : if (DCI.isBeforeLegalizeOps() ||
1935 : isCondCodeLegal(Cond, N0.getSimpleValueType()))
1936 190 : return DAG.getSetCC(DL, VT, N0, Zero, Cond);
1937 374 : } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
1938 : // If the target supports an 'and-not' or 'and-complement' logic operation,
1939 : // try to use that to make a comparison operation more efficient.
1940 : // But don't do this transform if the mask is a single bit because there are
1941 : // more efficient ways to deal with that case (for example, 'bt' on x86 or
1942 : // 'rlwinm' on PPC).
1943 :
1944 : // Bail out if the compare operand that we want to turn into a zero is
1945 : // already a zero (otherwise, infinite loop).
1946 : auto *YConst = dyn_cast<ConstantSDNode>(Y);
1947 40 : if (YConst && YConst->isNullValue())
1948 0 : return SDValue();
1949 :
1950 : // Transform this into: ~X & Y == 0.
1951 156 : SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
1952 78 : SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
1953 78 : return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
1954 : }
1955 :
1956 296 : return SDValue();
1957 : }
1958 :
1959 : /// There are multiple IR patterns that could be checking whether certain
1960 : /// truncation of a signed number would be lossy or not. The pattern which is
1961 : /// best at IR level, may not lower optimally. Thus, we want to unfold it.
1962 : /// We are looking for the following pattern: (KeptBits is a constant)
1963 : /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
1964 : /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
1965 : /// KeptBits also can't be 1, that would have been folded to %x dstcond 0
1966 : /// We will unfold it into the natural trunc+sext pattern:
1967 : /// ((%x << C) a>> C) dstcond %x
1968 : /// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
1969 230495 : SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
1970 : EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
1971 : const SDLoc &DL) const {
1972 : // We must be comparing with a constant.
1973 : ConstantSDNode *C1;
1974 : if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
1975 0 : return SDValue();
1976 :
1977 : // N0 should be: add %x, (1 << (KeptBits-1))
1978 230495 : if (N0->getOpcode() != ISD::ADD)
1979 211796 : return SDValue();
1980 :
1981 : // And we must be 'add'ing a constant.
1982 : ConstantSDNode *C01;
1983 18699 : if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
1984 665 : return SDValue();
1985 :
1986 18034 : SDValue X = N0->getOperand(0);
1987 18034 : EVT XVT = X.getValueType();
1988 :
1989 : // Validate constants ...
1990 :
1991 18034 : APInt I1 = C1->getAPIntValue();
1992 :
1993 : ISD::CondCode NewCond;
1994 18034 : if (Cond == ISD::CondCode::SETULT) {
1995 : NewCond = ISD::CondCode::SETEQ;
1996 9444 : } else if (Cond == ISD::CondCode::SETULE) {
1997 : NewCond = ISD::CondCode::SETEQ;
1998 : // But need to 'canonicalize' the constant.
1999 285 : I1 += 1;
2000 9159 : } else if (Cond == ISD::CondCode::SETUGT) {
2001 : NewCond = ISD::CondCode::SETNE;
2002 : // But need to 'canonicalize' the constant.
2003 3969 : I1 += 1;
2004 5190 : } else if (Cond == ISD::CondCode::SETUGE) {
2005 : NewCond = ISD::CondCode::SETNE;
2006 : } else
2007 4954 : return SDValue();
2008 :
2009 13080 : APInt I01 = C01->getAPIntValue();
2010 :
2011 : auto checkConstants = [&I1, &I01]() -> bool {
2012 : // Both of them must be power-of-two, and the constant from setcc is bigger.
2013 : return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
2014 : };
2015 :
2016 13080 : if (checkConstants()) {
2017 : // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
2018 : } else {
2019 : // What if we invert constants? (and the target predicate)
2020 : I1.negate();
2021 : I01.negate();
2022 12448 : NewCond = getSetCCInverse(NewCond, /*isInteger=*/true);
2023 12448 : if (!checkConstants())
2024 12364 : return SDValue();
2025 : // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
2026 : }
2027 :
2028 : // They are power-of-two, so which bit is set?
2029 : const unsigned KeptBits = I1.logBase2();
2030 : const unsigned KeptBitsMinusOne = I01.logBase2();
2031 :
2032 : // Magic!
2033 716 : if (KeptBits != (KeptBitsMinusOne + 1))
2034 416 : return SDValue();
2035 : assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
2036 :
2037 : // We don't want to do this in every single case.
2038 300 : SelectionDAG &DAG = DCI.DAG;
2039 300 : if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
2040 300 : XVT, KeptBits))
2041 128 : return SDValue();
2042 :
2043 172 : const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
2044 : assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
2045 :
2046 : // Unfold into: ((%x << C) a>> C) cond %x
2047 : // Where 'cond' will be either 'eq' or 'ne'.
2048 172 : SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
2049 172 : SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
2050 172 : SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
2051 172 : SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
2052 :
2053 172 : return T2;
2054 : }
2055 :
2056 : /// Try to simplify a setcc built with the specified operands and cc. If it is
2057 : /// unable to simplify it, return a null SDValue.
2058 397495 : SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
2059 : ISD::CondCode Cond, bool foldBooleans,
2060 : DAGCombinerInfo &DCI,
2061 : const SDLoc &dl) const {
2062 397495 : SelectionDAG &DAG = DCI.DAG;
2063 397495 : EVT OpVT = N0.getValueType();
2064 :
2065 : // These setcc operations always fold.
2066 397495 : switch (Cond) {
2067 : default: break;
2068 0 : case ISD::SETFALSE:
2069 0 : case ISD::SETFALSE2: return DAG.getBoolConstant(false, dl, VT, OpVT);
2070 0 : case ISD::SETTRUE:
2071 0 : case ISD::SETTRUE2: return DAG.getBoolConstant(true, dl, VT, OpVT);
2072 : }
2073 :
2074 : // Ensure that the constant occurs on the RHS and fold constant comparisons.
2075 : // TODO: Handle non-splat vector constants. All undef causes trouble.
2076 397495 : ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
2077 397495 : if (isConstOrConstSplat(N0) &&
2078 2034 : (DCI.isBeforeLegalizeOps() ||
2079 : isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
2080 845 : return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
2081 :
2082 : if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
2083 238947 : const APInt &C1 = N1C->getAPIntValue();
2084 :
2085 : // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
2086 : // equality comparison, then we're just comparing whether X itself is
2087 : // zero.
2088 239716 : if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
2089 239438 : N0.getOperand(0).getOpcode() == ISD::CTLZ &&
2090 0 : N0.getOperand(1).getOpcode() == ISD::Constant) {
2091 : const APInt &ShAmt
2092 0 : = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
2093 0 : if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2094 0 : ShAmt == Log2_32(N0.getValueSizeInBits())) {
2095 0 : if ((C1 == 0) == (Cond == ISD::SETEQ)) {
2096 : // (srl (ctlz x), 5) == 0 -> X != 0
2097 : // (srl (ctlz x), 5) != 1 -> X != 0
2098 : Cond = ISD::SETNE;
2099 : } else {
2100 : // (srl (ctlz x), 5) != 0 -> X == 0
2101 : // (srl (ctlz x), 5) == 1 -> X == 0
2102 : Cond = ISD::SETEQ;
2103 : }
2104 0 : SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
2105 0 : return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
2106 0 : Zero, Cond);
2107 : }
2108 : }
2109 :
2110 238947 : SDValue CTPOP = N0;
2111 : // Look through truncs that don't change the value of a ctpop.
2112 238947 : if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
2113 11709 : CTPOP = N0.getOperand(0);
2114 :
2115 238947 : if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
2116 10 : (N0 == CTPOP ||
2117 4 : N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
2118 8 : EVT CTVT = CTPOP.getValueType();
2119 4 : SDValue CTOp = CTPOP.getOperand(0);
2120 :
2121 : // (ctpop x) u< 2 -> (x & x-1) == 0
2122 : // (ctpop x) u> 1 -> (x & x-1) != 0
2123 4 : if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
2124 : SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
2125 4 : DAG.getConstant(1, dl, CTVT));
2126 4 : SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
2127 4 : ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
2128 4 : return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
2129 : }
2130 :
2131 : // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
2132 : }
2133 :
2134 : // (zext x) == C --> x == (trunc C)
2135 : // (sext x) == C --> x == (trunc C)
2136 423591 : if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2137 238943 : DCI.isBeforeLegalize() && N0->hasOneUse()) {
2138 63138 : unsigned MinBits = N0.getValueSizeInBits();
2139 63138 : SDValue PreExt;
2140 : bool Signed = false;
2141 126276 : if (N0->getOpcode() == ISD::ZERO_EXTEND) {
2142 : // ZExt
2143 1142 : MinBits = N0->getOperand(0).getValueSizeInBits();
2144 571 : PreExt = N0->getOperand(0);
2145 62567 : } else if (N0->getOpcode() == ISD::AND) {
2146 : // DAGCombine turns costly ZExts into ANDs
2147 7039 : if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
2148 11258 : if ((C->getAPIntValue()+1).isPowerOf2()) {
2149 4274 : MinBits = C->getAPIntValue().countTrailingOnes();
2150 4274 : PreExt = N0->getOperand(0);
2151 : }
2152 55528 : } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
2153 : // SExt
2154 296 : MinBits = N0->getOperand(0).getValueSizeInBits();
2155 148 : PreExt = N0->getOperand(0);
2156 : Signed = true;
2157 : } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
2158 : // ZEXTLOAD / SEXTLOAD
2159 20064 : if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
2160 0 : MinBits = LN0->getMemoryVT().getSizeInBits();
2161 0 : PreExt = N0;
2162 20064 : } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
2163 : Signed = true;
2164 97 : MinBits = LN0->getMemoryVT().getSizeInBits();
2165 97 : PreExt = N0;
2166 : }
2167 : }
2168 :
2169 : // Figure out how many bits we need to preserve this constant.
2170 245 : unsigned ReqdBits = Signed ?
2171 245 : C1.getBitWidth() - C1.getNumSignBits() + 1 :
2172 : C1.getActiveBits();
2173 :
2174 : // Make sure we're not losing bits from the constant.
2175 63138 : if (MinBits > 0 &&
2176 63138 : MinBits < C1.getBitWidth() &&
2177 : MinBits >= ReqdBits) {
2178 4999 : EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
2179 4999 : if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
2180 : // Will get folded away.
2181 216 : SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
2182 216 : if (MinBits == 1 && C1 == 1)
2183 : // Invert the condition.
2184 : return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
2185 0 : Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
2186 216 : SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
2187 216 : return DAG.getSetCC(dl, VT, Trunc, C, Cond);
2188 : }
2189 :
2190 : // If truncating the setcc operands is not desirable, we can still
2191 : // simplify the expression in some cases:
2192 : // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
2193 : // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
2194 : // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
2195 : // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
2196 : // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
2197 : // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
2198 4783 : SDValue TopSetCC = N0->getOperand(0);
2199 4783 : unsigned N0Opc = N0->getOpcode();
2200 4783 : bool SExt = (N0Opc == ISD::SIGN_EXTEND);
2201 5166 : if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
2202 205 : TopSetCC.getOpcode() == ISD::SETCC &&
2203 410 : (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
2204 213 : (isConstFalseVal(N1C) ||
2205 16 : isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
2206 :
2207 404 : bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
2208 5 : (!N1C->isNullValue() && Cond == ISD::SETNE);
2209 :
2210 : if (!Inverse)
2211 99 : return TopSetCC;
2212 :
2213 103 : ISD::CondCode InvCond = ISD::getSetCCInverse(
2214 : cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
2215 206 : TopSetCC.getOperand(0).getValueType().isInteger());
2216 : return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
2217 : TopSetCC.getOperand(1),
2218 103 : InvCond);
2219 : }
2220 : }
2221 : }
2222 :
2223 : // If the LHS is '(and load, const)', the RHS is 0, the test is for
2224 : // equality or unsigned, and all 1 bits of the const are in the same
2225 : // partial word, see if we can shorten the load.
2226 130397 : if (DCI.isBeforeLegalize() &&
2227 122949 : !ISD::isSignedIntSetCC(Cond) &&
2228 122949 : N0.getOpcode() == ISD::AND && C1 == 0 &&
2229 6697 : N0.getNode()->hasOneUse() &&
2230 : isa<LoadSDNode>(N0.getOperand(0)) &&
2231 238525 : N0.getOperand(0).getNode()->hasOneUse() &&
2232 : isa<ConstantSDNode>(N0.getOperand(1))) {
2233 : LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
2234 : APInt bestMask;
2235 : unsigned bestWidth = 0, bestOffset = 0;
2236 1938 : if (!Lod->isVolatile() && Lod->isUnindexed()) {
2237 1938 : unsigned origWidth = N0.getValueSizeInBits();
2238 : unsigned maskWidth = origWidth;
2239 : // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
2240 : // 8 bits, but have to be careful...
2241 1938 : if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
2242 0 : origWidth = Lod->getMemoryVT().getSizeInBits();
2243 : const APInt &Mask =
2244 3876 : cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
2245 2566 : for (unsigned width = origWidth / 2; width>=8; width /= 2) {
2246 628 : APInt newMask = APInt::getLowBitsSet(maskWidth, width);
2247 803 : for (unsigned offset=0; offset<origWidth/width; offset++) {
2248 790 : if (Mask.isSubsetOf(newMask)) {
2249 615 : if (DAG.getDataLayout().isLittleEndian())
2250 597 : bestOffset = (uint64_t)offset * (width/8);
2251 : else
2252 18 : bestOffset = (origWidth/width - offset - 1) * (width/8);
2253 615 : bestMask = Mask.lshr(offset * (width/8) * 8);
2254 : bestWidth = width;
2255 615 : break;
2256 : }
2257 175 : newMask <<= width;
2258 : }
2259 : }
2260 : }
2261 1938 : if (bestWidth) {
2262 334 : EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
2263 : if (newVT.isRound()) {
2264 333 : EVT PtrType = Lod->getOperand(1).getValueType();
2265 333 : SDValue Ptr = Lod->getBasePtr();
2266 333 : if (bestOffset != 0)
2267 89 : Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
2268 89 : DAG.getConstant(bestOffset, dl, PtrType));
2269 333 : unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
2270 : SDValue NewLoad = DAG.getLoad(
2271 : newVT, dl, Lod->getChain(), Ptr,
2272 666 : Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
2273 : return DAG.getSetCC(dl, VT,
2274 : DAG.getNode(ISD::AND, dl, newVT, NewLoad,
2275 333 : DAG.getConstant(bestMask.trunc(bestWidth),
2276 : dl, newVT)),
2277 999 : DAG.getConstant(0LL, dl, newVT), Cond);
2278 : }
2279 : }
2280 : }
2281 :
2282 : // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
2283 476384 : if (N0.getOpcode() == ISD::ZERO_EXTEND) {
2284 755 : unsigned InSize = N0.getOperand(0).getValueSizeInBits();
2285 :
2286 : // If the comparison constant has bits in the upper part, the
2287 : // zero-extended value could never match.
2288 755 : if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
2289 755 : C1.getBitWidth() - InSize))) {
2290 27 : switch (Cond) {
2291 9 : case ISD::SETUGT:
2292 : case ISD::SETUGE:
2293 : case ISD::SETEQ:
2294 9 : return DAG.getConstant(0, dl, VT);
2295 12 : case ISD::SETULT:
2296 : case ISD::SETULE:
2297 : case ISD::SETNE:
2298 12 : return DAG.getConstant(1, dl, VT);
2299 1 : case ISD::SETGT:
2300 : case ISD::SETGE:
2301 : // True if the sign bit of C1 is set.
2302 1 : return DAG.getConstant(C1.isNegative(), dl, VT);
2303 : case ISD::SETLT:
2304 : case ISD::SETLE:
2305 : // True if the sign bit of C1 isn't set.
2306 5 : return DAG.getConstant(C1.isNonNegative(), dl, VT);
2307 : default:
2308 : break;
2309 : }
2310 : }
2311 :
2312 : // Otherwise, we can perform the comparison with the low bits.
2313 : switch (Cond) {
2314 686 : case ISD::SETEQ:
2315 : case ISD::SETNE:
2316 : case ISD::SETUGT:
2317 : case ISD::SETUGE:
2318 : case ISD::SETULT:
2319 : case ISD::SETULE: {
2320 1372 : EVT newVT = N0.getOperand(0).getValueType();
2321 1372 : if (DCI.isBeforeLegalizeOps() ||
2322 30 : (isOperationLegal(ISD::SETCC, newVT) &&
2323 : isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
2324 : EVT NewSetCCVT =
2325 668 : getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
2326 668 : SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
2327 :
2328 668 : SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
2329 668 : NewConst, Cond);
2330 1336 : return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
2331 : }
2332 18 : break;
2333 : }
2334 : default:
2335 : break; // todo, be more careful with signed comparisons
2336 : }
2337 237437 : } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2338 : (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
2339 46 : EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
2340 46 : unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
2341 46 : EVT ExtDstTy = N0.getValueType();
2342 46 : unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
2343 :
2344 : // If the constant doesn't fit into the number of bits for the source of
2345 : // the sign extension, it is impossible for both sides to be equal.
2346 46 : if (C1.getMinSignedBits() > ExtSrcTyBits)
2347 2 : return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
2348 :
2349 44 : SDValue ZextOp;
2350 88 : EVT Op0Ty = N0.getOperand(0).getValueType();
2351 0 : if (Op0Ty == ExtSrcTy) {
2352 0 : ZextOp = N0.getOperand(0);
2353 : } else {
2354 44 : APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
2355 44 : ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
2356 44 : DAG.getConstant(Imm, dl, Op0Ty));
2357 : }
2358 44 : if (!DCI.isCalledByLegalizer())
2359 44 : DCI.AddToWorklist(ZextOp.getNode());
2360 : // Otherwise, make this a use of a zext.
2361 : return DAG.getSetCC(dl, VT, ZextOp,
2362 88 : DAG.getConstant(C1 & APInt::getLowBitsSet(
2363 : ExtDstTyBits,
2364 : ExtSrcTyBits),
2365 : dl, ExtDstTy),
2366 44 : Cond);
2367 571975 : } else if ((N1C->isNullValue() || N1C->isOne()) &&
2368 : (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
2369 : // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
2370 : if (N0.getOpcode() == ISD::SETCC &&
2371 149115 : isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
2372 181 : bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
2373 181 : if (TrueWhenTrue)
2374 98 : return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
2375 : // Invert the condition.
2376 83 : ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
2377 83 : CC = ISD::getSetCCInverse(CC,
2378 166 : N0.getOperand(0).getValueType().isInteger());
2379 166 : if (DCI.isBeforeLegalizeOps() ||
2380 20 : isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
2381 166 : return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
2382 : }
2383 :
2384 148554 : if ((N0.getOpcode() == ISD::XOR ||
2385 14025 : (N0.getOpcode() == ISD::AND &&
2386 14025 : N0.getOperand(0).getOpcode() == ISD::XOR &&
2387 348 : N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
2388 148659 : isa<ConstantSDNode>(N0.getOperand(1)) &&
2389 105 : cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
2390 : // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
2391 : // can only do this if the top bits are known zero.
2392 26 : unsigned BitWidth = N0.getValueSizeInBits();
2393 26 : if (DAG.MaskedValueIsZero(N0,
2394 52 : APInt::getHighBitsSet(BitWidth,
2395 : BitWidth-1))) {
2396 : // Okay, get the un-inverted input value.
2397 18 : SDValue Val;
2398 36 : if (N0.getOpcode() == ISD::XOR) {
2399 18 : Val = N0.getOperand(0);
2400 : } else {
2401 : assert(N0.getOpcode() == ISD::AND &&
2402 : N0.getOperand(0).getOpcode() == ISD::XOR);
2403 : // ((X^1)&1)^1 -> X & 1
2404 0 : Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
2405 0 : N0.getOperand(0).getOperand(0),
2406 0 : N0.getOperand(1));
2407 : }
2408 :
2409 : return DAG.getSetCC(dl, VT, Val, N1,
2410 23 : Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
2411 : }
2412 297056 : } else if (N1C->isOne() &&
2413 4098 : (VT == MVT::i1 ||
2414 8196 : getBooleanContents(N0->getValueType(0)) ==
2415 : ZeroOrOneBooleanContent)) {
2416 14998 : SDValue Op0 = N0;
2417 14998 : if (Op0.getOpcode() == ISD::TRUNCATE)
2418 6724 : Op0 = Op0.getOperand(0);
2419 :
2420 16 : if ((Op0.getOpcode() == ISD::XOR) &&
2421 14998 : Op0.getOperand(0).getOpcode() == ISD::SETCC &&
2422 4 : Op0.getOperand(1).getOpcode() == ISD::SETCC) {
2423 : // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
2424 4 : Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
2425 : return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
2426 6757 : Cond);
2427 : }
2428 : if (Op0.getOpcode() == ISD::AND &&
2429 15479 : isa<ConstantSDNode>(Op0.getOperand(1)) &&
2430 485 : cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
2431 : // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
2432 406 : if (Op0.getValueType().bitsGT(VT))
2433 54 : Op0 = DAG.getNode(ISD::AND, dl, VT,
2434 : DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
2435 54 : DAG.getConstant(1, dl, VT));
2436 352 : else if (Op0.getValueType().bitsLT(VT))
2437 1 : Op0 = DAG.getNode(ISD::AND, dl, VT,
2438 : DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
2439 1 : DAG.getConstant(1, dl, VT));
2440 :
2441 : return DAG.getSetCC(dl, VT, Op0,
2442 : DAG.getConstant(0, dl, Op0.getValueType()),
2443 796 : Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
2444 : }
2445 14588 : if (Op0.getOpcode() == ISD::AssertZext &&
2446 14588 : cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
2447 : return DAG.getSetCC(dl, VT, Op0,
2448 : DAG.getConstant(0, dl, Op0.getValueType()),
2449 12693 : Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
2450 : }
2451 : }
2452 :
2453 230495 : if (SDValue V =
2454 230495 : optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
2455 172 : return V;
2456 : }
2457 :
2458 : // These simplifications apply to splat vectors as well.
2459 : // TODO: Handle more splat vector cases.
2460 388026 : if (auto *N1C = isConstOrConstSplat(N1)) {
2461 238483 : const APInt &C1 = N1C->getAPIntValue();
2462 :
2463 : APInt MinVal, MaxVal;
2464 476966 : unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
2465 238483 : if (ISD::isSignedIntSetCC(Cond)) {
2466 16827 : MinVal = APInt::getSignedMinValue(OperandBitSize);
2467 33654 : MaxVal = APInt::getSignedMaxValue(OperandBitSize);
2468 : } else {
2469 443312 : MinVal = APInt::getMinValue(OperandBitSize);
2470 221656 : MaxVal = APInt::getMaxValue(OperandBitSize);
2471 : }
2472 :
2473 : // Canonicalize GE/LE comparisons to use GT/LT comparisons.
2474 238483 : if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
2475 : // X >= MIN --> true
2476 2277 : if (C1 == MinVal)
2477 55 : return DAG.getBoolConstant(true, dl, VT, OpVT);
2478 :
2479 2222 : if (!VT.isVector()) { // TODO: Support this for vectors.
2480 : // X >= C0 --> X > (C0 - 1)
2481 2175 : APInt C = C1 - 1;
2482 2175 : ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
2483 2175 : if ((DCI.isBeforeLegalizeOps() ||
2484 2175 : isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
2485 2188 : (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
2486 26 : isLegalICmpImmediate(C.getSExtValue())))) {
2487 : return DAG.getSetCC(dl, VT, N0,
2488 : DAG.getConstant(C, dl, N1.getValueType()),
2489 2162 : NewCC);
2490 : }
2491 : }
2492 : }
2493 :
2494 236266 : if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
2495 : // X <= MAX --> true
2496 5997 : if (C1 == MaxVal)
2497 7 : return DAG.getBoolConstant(true, dl, VT, OpVT);
2498 :
2499 : // X <= C0 --> X < (C0 + 1)
2500 5990 : if (!VT.isVector()) { // TODO: Support this for vectors.
2501 5913 : APInt C = C1 + 1;
2502 5913 : ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
2503 5913 : if ((DCI.isBeforeLegalizeOps() ||
2504 5913 : isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
2505 5973 : (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
2506 120 : isLegalICmpImmediate(C.getSExtValue())))) {
2507 : return DAG.getSetCC(dl, VT, N0,
2508 : DAG.getConstant(C, dl, N1.getValueType()),
2509 5853 : NewCC);
2510 : }
2511 : }
2512 : }
2513 :
2514 230406 : if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
2515 28307 : if (C1 == MinVal)
2516 77 : return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
2517 :
2518 : // TODO: Support this for vectors after legalize ops.
2519 28230 : if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
2520 : // Canonicalize setlt X, Max --> setne X, Max
2521 28210 : if (C1 == MaxVal)
2522 24 : return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
2523 :
2524 : // If we have setult X, 1, turn it into seteq X, 0
2525 28186 : if (C1 == MinVal+1)
2526 : return DAG.getSetCC(dl, VT, N0,
2527 : DAG.getConstant(MinVal, dl, N0.getValueType()),
2528 480 : ISD::SETEQ);
2529 : }
2530 : }
2531 :
2532 230065 : if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
2533 18080 : if (C1 == MaxVal)
2534 13 : return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
2535 :
2536 : // TODO: Support this for vectors after legalize ops.
2537 18067 : if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
2538 : // Canonicalize setgt X, Min --> setne X, Min
2539 18046 : if (C1 == MinVal)
2540 147 : return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
2541 :
2542 : // If we have setugt X, Max-1, turn it into seteq X, Max
2543 17899 : if (C1 == MaxVal-1)
2544 : return DAG.getSetCC(dl, VT, N0,
2545 : DAG.getConstant(MaxVal, dl, N0.getValueType()),
2546 10 : ISD::SETEQ);
2547 : }
2548 : }
2549 :
2550 : // If we have "setcc X, C0", check to see if we can shrink the immediate
2551 : // by changing cc.
2552 : // TODO: Support this for vectors after legalize ops.
2553 229900 : if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
2554 : // SETUGT X, SINTMAX -> SETLT X, 0
2555 238849 : if (Cond == ISD::SETUGT &&
2556 250319 : C1 == APInt::getSignedMaxValue(OperandBitSize))
2557 : return DAG.getSetCC(dl, VT, N0,
2558 : DAG.getConstant(0, dl, N1.getValueType()),
2559 6 : ISD::SETLT);
2560 :
2561 : // SETULT X, SINTMIN -> SETGT X, -1
2562 247394 : if (Cond == ISD::SETULT &&
2563 267337 : C1 == APInt::getSignedMinValue(OperandBitSize)) {
2564 : SDValue ConstMinusOne =
2565 84 : DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
2566 84 : N1.getValueType());
2567 84 : return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
2568 : }
2569 : }
2570 : }
2571 :
2572 : // Back to non-vector simplifications.
2573 : // TODO: Can we do these for vector splats?
2574 : if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
2575 221692 : const APInt &C1 = N1C->getAPIntValue();
2576 :
2577 : // Fold bit comparisons when we can.
2578 0 : if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2579 176519 : (VT == N0.getValueType() ||
2580 322804 : (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
2581 : N0.getOpcode() == ISD::AND) {
2582 9811 : auto &DL = DAG.getDataLayout();
2583 9811 : if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2584 : EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
2585 15698 : !DCI.isBeforeLegalize());
2586 7849 : if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
2587 : // Perform the xform if the AND RHS is a single bit.
2588 2704 : if (AndRHS->getAPIntValue().isPowerOf2()) {
2589 : return DAG.getNode(ISD::TRUNCATE, dl, VT,
2590 : DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
2591 : DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl,
2592 961 : ShiftTy)));
2593 : }
2594 12847 : } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
2595 : // (X & 8) == 8 --> (X & 8) >> 3
2596 : // Perform the xform if C1 is a single bit.
2597 55 : if (C1.isPowerOf2()) {
2598 : return DAG.getNode(ISD::TRUNCATE, dl, VT,
2599 : DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
2600 : DAG.getConstant(C1.logBase2(), dl,
2601 0 : ShiftTy)));
2602 : }
2603 : }
2604 : }
2605 : }
2606 :
2607 441425 : if (C1.getMinSignedBits() <= 64 &&
2608 441388 : !isLegalICmpImmediate(C1.getSExtValue())) {
2609 : // (X & -256) == 256 -> (X >> 8) == 1
2610 290 : if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2611 1103 : N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
2612 17 : if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2613 17 : const APInt &AndRHSC = AndRHS->getAPIntValue();
2614 53 : if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
2615 12 : unsigned ShiftBits = AndRHSC.countTrailingZeros();
2616 12 : auto &DL = DAG.getDataLayout();
2617 : EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
2618 24 : !DCI.isBeforeLegalize());
2619 24 : EVT CmpTy = N0.getValueType();
2620 12 : SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
2621 : DAG.getConstant(ShiftBits, dl,
2622 12 : ShiftTy));
2623 12 : SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy);
2624 12 : return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
2625 : }
2626 : }
2627 1086 : } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
2628 1086 : Cond == ISD::SETULE || Cond == ISD::SETUGT) {
2629 342 : bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
2630 : // X < 0x100000000 -> (X >> 32) < 1
2631 : // X >= 0x100000000 -> (X >> 32) >= 1
2632 : // X <= 0x0ffffffff -> (X >> 32) < 1
2633 : // X > 0x0ffffffff -> (X >> 32) >= 1
2634 : unsigned ShiftBits;
2635 : APInt NewC = C1;
2636 : ISD::CondCode NewCond = Cond;
2637 342 : if (AdjOne) {
2638 : ShiftBits = C1.countTrailingOnes();
2639 194 : NewC = NewC + 1;
2640 194 : NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2641 : } else {
2642 148 : ShiftBits = C1.countTrailingZeros();
2643 : }
2644 : NewC.lshrInPlace(ShiftBits);
2645 528 : if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
2646 372 : isLegalICmpImmediate(NewC.getSExtValue())) {
2647 142 : auto &DL = DAG.getDataLayout();
2648 : EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
2649 284 : !DCI.isBeforeLegalize());
2650 284 : EVT CmpTy = N0.getValueType();
2651 : SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
2652 142 : DAG.getConstant(ShiftBits, dl, ShiftTy));
2653 142 : SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy);
2654 142 : return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
2655 : }
2656 : }
2657 : }
2658 : }
2659 :
2660 378238 : if (isa<ConstantFPSDNode>(N0.getNode())) {
2661 : // Constant fold or commute setcc.
2662 143 : SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
2663 143 : if (O.getNode()) return O;
2664 : } else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
2665 : // If the RHS of an FP comparison is a constant, simplify it away in
2666 : // some cases.
2667 14062 : if (CFP->getValueAPF().isNaN()) {
2668 : // If an operand is known to be a nan, we can fold it.
2669 1 : switch (ISD::getUnorderedFlavor(Cond)) {
2670 0 : default: llvm_unreachable("Unknown flavor!");
2671 0 : case 0: // Known false.
2672 0 : return DAG.getBoolConstant(false, dl, VT, OpVT);
2673 1 : case 1: // Known true.
2674 1 : return DAG.getBoolConstant(true, dl, VT, OpVT);
2675 0 : case 2: // Undefined.
2676 0 : return DAG.getUNDEF(VT);
2677 : }
2678 : }
2679 :
2680 : // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
2681 : // constant if knowing that the operand is non-nan is enough. We prefer to
2682 : // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
2683 : // materialize 0.0.
2684 7030 : if (Cond == ISD::SETO || Cond == ISD::SETUO)
2685 107 : return DAG.getSetCC(dl, VT, N0, N0, Cond);
2686 :
2687 : // setcc (fneg x), C -> setcc swap(pred) x, -C
2688 6923 : if (N0.getOpcode() == ISD::FNEG) {
2689 61 : ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
2690 122 : if (DCI.isBeforeLegalizeOps() ||
2691 : isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
2692 122 : SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
2693 122 : return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
2694 : }
2695 : }
2696 :
2697 : // If the condition is not legal, see if we can find an equivalent one
2698 : // which is legal.
2699 6862 : if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
2700 : // If the comparison was an awkward floating-point == or != and one of
2701 : // the comparison operands is infinity or negative infinity, convert the
2702 : // condition to a less-awkward <= or >=.
2703 1828 : if (CFP->getValueAPF().isInfinity()) {
2704 258 : if (CFP->getValueAPF().isNegative()) {
2705 7 : if (Cond == ISD::SETOEQ &&
2706 : isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
2707 2 : return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
2708 5 : if (Cond == ISD::SETUEQ &&
2709 : isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
2710 0 : return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
2711 5 : if (Cond == ISD::SETUNE &&
2712 : isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
2713 5 : return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
2714 0 : if (Cond == ISD::SETONE &&
2715 : isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
2716 0 : return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
2717 : } else {
2718 251 : if (Cond == ISD::SETOEQ &&
2719 : isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
2720 135 : return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
2721 116 : if (Cond == ISD::SETUEQ &&
2722 : isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
2723 0 : return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
2724 116 : if (Cond == ISD::SETUNE &&
2725 : isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
2726 116 : return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
2727 0 : if (Cond == ISD::SETONE &&
2728 : isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
2729 0 : return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
2730 : }
2731 : }
2732 : }
2733 : }
2734 :
2735 377810 : if (N0 == N1) {
2736 : // The sext(setcc()) => setcc() optimization relies on the appropriate
2737 : // constant being emitted.
2738 :
2739 : bool EqTrue = ISD::isTrueWhenEqual(Cond);
2740 :
2741 : // We can always fold X == X for integer setcc's.
2742 1044 : if (N0.getValueType().isInteger())
2743 109 : return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
2744 :
2745 : unsigned UOF = ISD::getUnorderedFlavor(Cond);
2746 935 : if (UOF == 2) // FP operators that are undefined on NaNs.
2747 0 : return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
2748 935 : if (UOF == unsigned(EqTrue))
2749 19 : return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
2750 : // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
2751 : // if it is not already.
2752 916 : ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
2753 916 : if (NewCond != Cond &&
2754 224 : (DCI.isBeforeLegalizeOps() ||
2755 : isCondCodeLegal(NewCond, N0.getSimpleValueType())))
2756 46 : return DAG.getSetCC(dl, VT, N0, N1, NewCond);
2757 : }
2758 :
2759 377636 : if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2760 639490 : N0.getValueType().isInteger()) {
2761 261465 : if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
2762 : N0.getOpcode() == ISD::XOR) {
2763 : // Simplify (X+Y) == (X+Z) --> Y == Z
2764 6357 : if (N0.getOpcode() == N1.getOpcode()) {
2765 26 : if (N0.getOperand(0) == N1.getOperand(0))
2766 4 : return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
2767 22 : if (N0.getOperand(1) == N1.getOperand(1))
2768 0 : return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
2769 22 : if (isCommutativeBinOp(N0.getOpcode())) {
2770 : // If X op Y == Y op X, try other combinations.
2771 32 : if (N0.getOperand(0) == N1.getOperand(1))
2772 : return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
2773 0 : Cond);
2774 16 : if (N0.getOperand(1) == N1.getOperand(0))
2775 : return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
2776 0 : Cond);
2777 : }
2778 : }
2779 :
2780 : // If RHS is a legal immediate value for a compare instruction, we need
2781 : // to be careful about increasing register pressure needlessly.
2782 : bool LegalRHSImm = false;
2783 :
2784 : if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
2785 4810 : if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2786 : // Turn (X+C1) == C2 --> X == C2-C1
2787 3215 : if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
2788 53 : return DAG.getSetCC(dl, VT, N0.getOperand(0),
2789 159 : DAG.getConstant(RHSC->getAPIntValue()-
2790 : LHSR->getAPIntValue(),
2791 106 : dl, N0.getValueType()), Cond);
2792 : }
2793 :
2794 : // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
2795 3162 : if (N0.getOpcode() == ISD::XOR)
2796 : // If we know that all of the inverted bits are zero, don't bother
2797 : // performing the inversion.
2798 356 : if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
2799 : return
2800 18 : DAG.getSetCC(dl, VT, N0.getOperand(0),
2801 54 : DAG.getConstant(LHSR->getAPIntValue() ^
2802 : RHSC->getAPIntValue(),
2803 : dl, N0.getValueType()),
2804 36 : Cond);
2805 : }
2806 :
2807 : // Turn (C1-X) == C2 --> X == C1-C2
2808 4739 : if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
2809 59 : if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
2810 : return
2811 3 : DAG.getSetCC(dl, VT, N0.getOperand(1),
2812 9 : DAG.getConstant(SUBC->getAPIntValue() -
2813 : RHSC->getAPIntValue(),
2814 : dl, N0.getValueType()),
2815 6 : Cond);
2816 : }
2817 : }
2818 :
2819 : // Could RHSC fold directly into a compare?
2820 9472 : if (RHSC->getValueType(0).getSizeInBits() <= 64)
2821 9472 : LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
2822 : }
2823 :
2824 : // Simplify (X+Z) == X --> Z == 0
2825 : // Don't do this if X is an immediate that can fold into a cmp
2826 : // instruction and X+Z has other uses. It could be an induction variable
2827 : // chain, and the transform would increase register pressure.
2828 4736 : if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
2829 4360 : if (N0.getOperand(0) == N1)
2830 32 : return DAG.getSetCC(dl, VT, N0.getOperand(1),
2831 64 : DAG.getConstant(0, dl, N0.getValueType()), Cond);
2832 2148 : if (N0.getOperand(1) == N1) {
2833 28 : if (isCommutativeBinOp(N0.getOpcode()))
2834 2 : return DAG.getSetCC(dl, VT, N0.getOperand(0),
2835 : DAG.getConstant(0, dl, N0.getValueType()),
2836 4 : Cond);
2837 12 : if (N0.getNode()->hasOneUse()) {
2838 : assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
2839 2 : auto &DL = DAG.getDataLayout();
2840 : // (Z-X) == X --> Z == X<<1
2841 : SDValue SH = DAG.getNode(
2842 : ISD::SHL, dl, N1.getValueType(), N1,
2843 : DAG.getConstant(1, dl,
2844 : getShiftAmountTy(N1.getValueType(), DL,
2845 4 : !DCI.isBeforeLegalize())));
2846 2 : if (!DCI.isCalledByLegalizer())
2847 2 : DCI.AddToWorklist(SH.getNode());
2848 4 : return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
2849 : }
2850 : }
2851 : }
2852 : }
2853 :
2854 261351 : if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
2855 : N1.getOpcode() == ISD::XOR) {
2856 : // Simplify X == (X+Z) --> Z == 0
2857 3932 : if (N1.getOperand(0) == N0)
2858 : return DAG.getSetCC(dl, VT, N1.getOperand(1),
2859 1 : DAG.getConstant(0, dl, N1.getValueType()), Cond);
2860 3931 : if (N1.getOperand(1) == N0) {
2861 15 : if (isCommutativeBinOp(N1.getOpcode()))
2862 : return DAG.getSetCC(dl, VT, N1.getOperand(0),
2863 0 : DAG.getConstant(0, dl, N1.getValueType()), Cond);
2864 : if (N1.getNode()->hasOneUse()) {
2865 : assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
2866 3 : auto &DL = DAG.getDataLayout();
2867 : // X == (Z-X) --> X<<1 == Z
2868 : SDValue SH = DAG.getNode(
2869 : ISD::SHL, dl, N1.getValueType(), N0,
2870 : DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL,
2871 6 : !DCI.isBeforeLegalize())));
2872 3 : if (!DCI.isCalledByLegalizer())
2873 3 : DCI.AddToWorklist(SH.getNode());
2874 3 : return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
2875 : }
2876 : }
2877 : }
2878 :
2879 261347 : if (SDValue V = simplifySetCCWithAnd(VT, N0, N1, Cond, DCI, dl))
2880 268 : return V;
2881 : }
2882 :
2883 : // Fold away ALL boolean setcc's.
2884 377250 : SDValue Temp;
2885 757006 : if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
2886 133 : EVT OpVT = N0.getValueType();
2887 133 : switch (Cond) {
2888 0 : default: llvm_unreachable("Unknown integer setcc!");
2889 : case ISD::SETEQ: // X == Y -> ~(X^Y)
2890 44 : Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
2891 44 : N0 = DAG.getNOT(dl, Temp, OpVT);
2892 44 : if (!DCI.isCalledByLegalizer())
2893 44 : DCI.AddToWorklist(Temp.getNode());
2894 : break;
2895 : case ISD::SETNE: // X != Y --> (X^Y)
2896 38 : N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
2897 38 : break;
2898 15 : case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
2899 : case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
2900 15 : Temp = DAG.getNOT(dl, N0, OpVT);
2901 15 : N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
2902 15 : if (!DCI.isCalledByLegalizer())
2903 15 : DCI.AddToWorklist(Temp.getNode());
2904 : break;
2905 18 : case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
2906 : case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
2907 18 : Temp = DAG.getNOT(dl, N1, OpVT);
2908 18 : N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
2909 18 : if (!DCI.isCalledByLegalizer())
2910 18 : DCI.AddToWorklist(Temp.getNode());
2911 : break;
2912 9 : case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
2913 : case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
2914 9 : Temp = DAG.getNOT(dl, N0, OpVT);
2915 9 : N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
2916 9 : if (!DCI.isCalledByLegalizer())
2917 9 : DCI.AddToWorklist(Temp.getNode());
2918 : break;
2919 9 : case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
2920 : case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
2921 9 : Temp = DAG.getNOT(dl, N1, OpVT);
2922 9 : N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
2923 9 : break;
2924 : }
2925 133 : if (VT.getScalarType() != MVT::i1) {
2926 6 : if (!DCI.isCalledByLegalizer())
2927 6 : DCI.AddToWorklist(N0.getNode());
2928 : // FIXME: If running after legalize, we probably can't do this.
2929 6 : ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
2930 6 : N0 = DAG.getNode(ExtendCode, dl, VT, N0);
2931 : }
2932 133 : return N0;
2933 : }
2934 :
2935 : // Could not fold it.
2936 377117 : return SDValue();
2937 : }
2938 :
2939 : /// Returns true (and the GlobalValue and the offset) if the node is a
2940 : /// GlobalAddress + offset.
2941 22333204 : bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
2942 : int64_t &Offset) const {
2943 : if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
2944 2814866 : GA = GASD->getGlobal();
2945 2814866 : Offset += GASD->getOffset();
2946 2814866 : return true;
2947 : }
2948 :
2949 19518338 : if (N->getOpcode() == ISD::ADD) {
2950 8583611 : SDValue N1 = N->getOperand(0);
2951 8583611 : SDValue N2 = N->getOperand(1);
2952 8583611 : if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
2953 : if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
2954 1933819 : Offset += V->getSExtValue();
2955 1933819 : return true;
2956 : }
2957 6639692 : } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
2958 : if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
2959 0 : Offset += V->getSExtValue();
2960 0 : return true;
2961 : }
2962 : }
2963 : }
2964 :
2965 : return false;
2966 : }
2967 :
2968 44070 : SDValue TargetLowering::PerformDAGCombine(SDNode *N,
2969 : DAGCombinerInfo &DCI) const {
2970 : // Default implementation: no optimization.
2971 44070 : return SDValue();
2972 : }
2973 :
2974 : //===----------------------------------------------------------------------===//
2975 : // Inline Assembler Implementation Methods
2976 : //===----------------------------------------------------------------------===//
2977 :
2978 : TargetLowering::ConstraintType
2979 478537 : TargetLowering::getConstraintType(StringRef Constraint) const {
2980 478537 : unsigned S = Constraint.size();
2981 :
2982 478537 : if (S == 1) {
2983 30682 : switch (Constraint[0]) {
2984 : default: break;
2985 : case 'r': return C_RegisterClass;
2986 : case 'm': // memory
2987 : case 'o': // offsetable
2988 : case 'V': // not offsetable
2989 : return C_Memory;
2990 : case 'i': // Simple Integer or Relocatable Constant
2991 : case 'n': // Simple Integer
2992 : case 'E': // Floating Point Constant
2993 : case 'F': // Floating Point Constant
2994 : case 's': // Relocatable Constant
2995 : case 'p': // Address.
2996 : case 'X': // Allow ANY value.
2997 : case 'I': // Target registers.
2998 : case 'J':
2999 : case 'K':
3000 : case 'L':
3001 : case 'M':
3002 : case 'N':
3003 : case 'O':
3004 : case 'P':
3005 : case '<':
3006 : case '>':
3007 : return C_Other;
3008 : }
3009 : }
3010 :
3011 449135 : if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') {
3012 450770 : if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
3013 : return C_Memory;
3014 444657 : return C_Register;
3015 : }
3016 : return C_Unknown;
3017 : }
3018 :
3019 : /// Try to replace an X constraint, which matches anything, with another that
3020 : /// has more specific requirements based on the type of the corresponding
3021 : /// operand.
3022 123 : const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
3023 123 : if (ConstraintVT.isInteger())
3024 : return "r";
3025 39 : if (ConstraintVT.isFloatingPoint())
3026 36 : return "f"; // works for many targets
3027 : return nullptr;
3028 : }
3029 :
3030 : /// Lower the specified operand into the Ops vector.
3031 : /// If it is invalid, don't add anything to Ops.
3032 221 : void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3033 : std::string &Constraint,
3034 : std::vector<SDValue> &Ops,
3035 : SelectionDAG &DAG) const {
3036 :
3037 221 : if (Constraint.length() > 1) return;
3038 :
3039 221 : char ConstraintLetter = Constraint[0];
3040 221 : switch (ConstraintLetter) {
3041 : default: break;
3042 24 : case 'X': // Allows any operand; labels (basic block) use this.
3043 48 : if (Op.getOpcode() == ISD::BasicBlock) {
3044 2 : Ops.push_back(Op);
3045 2 : return;
3046 : }
3047 : LLVM_FALLTHROUGH;
3048 : case 'i': // Simple Integer or Relocatable Constant
3049 : case 'n': // Simple Integer
3050 : case 's': { // Relocatable Constant
3051 : // These operands are interested in values of the form (GV+C), where C may
3052 : // be folded in as an offset of GV, or it may be explicitly added. Also, it
3053 : // is possible and fine if either GV or C are missing.
3054 : ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
3055 : GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
3056 :
3057 : // If we have "(add GV, C)", pull out GV/C
3058 165 : if (Op.getOpcode() == ISD::ADD) {
3059 : C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
3060 : GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
3061 4 : if (!C || !GA) {
3062 : C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
3063 : GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
3064 : }
3065 4 : if (!C || !GA) {
3066 : C = nullptr;
3067 : GA = nullptr;
3068 : }
3069 : }
3070 :
3071 : // If we find a valid operand, map to the TargetXXX version so that the
3072 : // value itself doesn't get selected.
3073 165 : if (GA) { // Either &GV or &GV+C
3074 22 : if (ConstraintLetter != 'n') {
3075 21 : int64_t Offs = GA->getOffset();
3076 25 : if (C) Offs += C->getZExtValue();
3077 21 : Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
3078 42 : C ? SDLoc(C) : SDLoc(),
3079 42 : Op.getValueType(), Offs));
3080 : }
3081 22 : return;
3082 : }
3083 143 : if (C) { // just C, no GV.
3084 : // Simple constants are not allowed for 's'.
3085 125 : if (ConstraintLetter != 's') {
3086 : // gcc prints these as sign extended. Sign extend value to 64 bits
3087 : // now; without this it would get ZExt'd later in
3088 : // ScheduleDAGSDNodes::EmitNode, which is very generic.
3089 250 : Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
3090 250 : SDLoc(C), MVT::i64));
3091 : }
3092 125 : return;
3093 : }
3094 : break;
3095 : }
3096 : }
3097 : }
3098 :
3099 : std::pair<unsigned, const TargetRegisterClass *>
3100 103952 : TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
3101 : StringRef Constraint,
3102 : MVT VT) const {
3103 103952 : if (Constraint.empty() || Constraint[0] != '{')
3104 141 : return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr));
3105 : assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
3106 :
3107 : // Remove the braces from around the name.
3108 103811 : StringRef RegName(Constraint.data()+1, Constraint.size()-2);
3109 :
3110 : std::pair<unsigned, const TargetRegisterClass*> R =
3111 : std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
3112 :
3113 : // Figure out which register class contains this reg.
3114 10793168 : for (const TargetRegisterClass *RC : RI->regclasses()) {
3115 : // If none of the value types for this register class are valid, we
3116 : // can't use it. For example, 64-bit reg classes on 32-bit targets.
3117 10690261 : if (!isLegalRC(*RI, *RC))
3118 : continue;
3119 :
3120 82981260 : for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
3121 82981260 : I != E; ++I) {
3122 148557767 : if (RegName.equals_lower(RI->getRegAsmName(*I))) {
3123 : std::pair<unsigned, const TargetRegisterClass*> S =
3124 513957 : std::make_pair(*I, RC);
3125 :
3126 : // If this register class has the requested value type, return it,
3127 : // otherwise keep searching and return the first class found
3128 : // if no other is found which explicitly has the requested type.
3129 513957 : if (RI->isTypeLegalForClass(*RC, VT))
3130 904 : return S;
3131 513053 : if (!R.second)
3132 : R = S;
3133 : }
3134 : }
3135 : }
3136 :
3137 102907 : return R;
3138 : }
3139 :
3140 : //===----------------------------------------------------------------------===//
3141 : // Constraint Selection.
3142 :
3143 : /// Return true of this is an input operand that is a matching constraint like
3144 : /// "4".
3145 211041 : bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
3146 : assert(!ConstraintCode.empty() && "No known constraint!");
3147 211041 : return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
3148 : }
3149 :
3150 : /// If this is an input matching constraint, this method returns the output
3151 : /// operand it matches.
3152 1485 : unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
3153 : assert(!ConstraintCode.empty() && "No known constraint!");
3154 1485 : return atoi(ConstraintCode.c_str());
3155 : }
3156 :
3157 : /// Split up the constraint string from the inline assembly value into the
3158 : /// specific constraints and their prefixes, and also tie in the associated
3159 : /// operand values.
3160 : /// If this returns an empty vector, and if the constraint string itself
3161 : /// isn't empty, there was an error parsing.
3162 : TargetLowering::AsmOperandInfoVector
3163 47955 : TargetLowering::ParseConstraints(const DataLayout &DL,
3164 : const TargetRegisterInfo *TRI,
3165 : ImmutableCallSite CS) const {
3166 : /// Information about all of the constraints.
3167 : AsmOperandInfoVector ConstraintOperands;
3168 : const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
3169 : unsigned maCount = 0; // Largest number of multiple alternative constraints.
3170 :
3171 : // Do a prepass over the constraints, canonicalizing them, and building up the
3172 : // ConstraintOperands list.
3173 : unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
3174 : unsigned ResNo = 0; // ResNo - The result number of the next output.
3175 :
3176 251586 : for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
3177 203631 : ConstraintOperands.emplace_back(std::move(CI));
3178 : AsmOperandInfo &OpInfo = ConstraintOperands.back();
3179 :
3180 : // Update multiple alternative constraint count.
3181 407262 : if (OpInfo.multipleAlternatives.size() > maCount)
3182 509 : maCount = OpInfo.multipleAlternatives.size();
3183 :
3184 203631 : OpInfo.ConstraintVT = MVT::Other;
3185 :
3186 : // Compute the value type for each operand.
3187 203631 : switch (OpInfo.Type) {
3188 12827 : case InlineAsm::isOutput:
3189 : // Indirect outputs just consume an argument.
3190 12827 : if (OpInfo.isIndirect) {
3191 832 : OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
3192 832 : break;
3193 : }
3194 :
3195 : // The return value of the call is this value. As such, there is no
3196 : // corresponding argument.
3197 : assert(!CS.getType()->isVoidTy() &&
3198 : "Bad inline asm!");
3199 : if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
3200 : OpInfo.ConstraintVT =
3201 1874 : getSimpleValueType(DL, STy->getElementType(ResNo));
3202 : } else {
3203 : assert(ResNo == 0 && "Asm only has one result!");
3204 10121 : OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
3205 : }
3206 11995 : ++ResNo;
3207 11995 : break;
3208 33330 : case InlineAsm::isInput:
3209 33330 : OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
3210 33330 : break;
3211 : case InlineAsm::isClobber:
3212 : // Nothing to do.
3213 : break;
3214 : }
3215 :
3216 203631 : if (OpInfo.CallOperandVal) {
3217 34162 : llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
3218 34162 : if (OpInfo.isIndirect) {
3219 : llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
3220 : if (!PtrTy)
3221 0 : report_fatal_error("Indirect operand for inline asm not a pointer!");
3222 9320 : OpTy = PtrTy->getElementType();
3223 : }
3224 :
3225 : // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
3226 : if (StructType *STy = dyn_cast<StructType>(OpTy))
3227 45 : if (STy->getNumElements() == 1)
3228 26 : OpTy = STy->getElementType(0);
3229 :
3230 : // If OpTy is not a single value, it may be a struct/union that we
3231 : // can tile with integers.
3232 34162 : if (!OpTy->isSingleValueType() && OpTy->isSized()) {
3233 59 : unsigned BitSize = DL.getTypeSizeInBits(OpTy);
3234 59 : switch (BitSize) {
3235 : default: break;
3236 32 : case 1:
3237 : case 8:
3238 : case 16:
3239 : case 32:
3240 : case 64:
3241 : case 128:
3242 : OpInfo.ConstraintVT =
3243 32 : MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
3244 32 : break;
3245 : }
3246 : } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
3247 : unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
3248 1425 : OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
3249 : } else {
3250 32678 : OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
3251 : }
3252 : }
3253 : }
3254 :
3255 : // If we have multiple alternative constraints, select the best alternative.
3256 47955 : if (!ConstraintOperands.empty()) {
3257 31761 : if (maCount) {
3258 : unsigned bestMAIndex = 0;
3259 : int bestWeight = -1;
3260 : // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
3261 : int weight = -1;
3262 : unsigned maIndex;
3263 : // Compute the sums of the weights for each alternative, keeping track
3264 : // of the best (highest weight) one so far.
3265 1581 : for (maIndex = 0; maIndex < maCount; ++maIndex) {
3266 : int weightSum = 0;
3267 5003 : for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
3268 3931 : cIndex != eIndex; ++cIndex) {
3269 3128 : AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
3270 3128 : if (OpInfo.Type == InlineAsm::isClobber)
3271 : continue;
3272 :
3273 : // If this is an output operand with a matching input operand,
3274 : // look up the matching input. If their types mismatch, e.g. one
3275 : // is an integer, the other is floating point, or their sizes are
3276 : // different, flag it as an maCantMatch.
3277 1935 : if (OpInfo.hasMatchingInput()) {
3278 156 : AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
3279 156 : if (OpInfo.ConstraintVT != Input.ConstraintVT) {
3280 : if ((OpInfo.ConstraintVT.isInteger() !=
3281 8 : Input.ConstraintVT.isInteger()) ||
3282 8 : (OpInfo.ConstraintVT.getSizeInBits() !=
3283 8 : Input.ConstraintVT.getSizeInBits())) {
3284 : weightSum = -1; // Can't match.
3285 : break;
3286 : }
3287 : }
3288 : }
3289 1927 : weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
3290 1927 : if (weight == -1) {
3291 : weightSum = -1;
3292 : break;
3293 : }
3294 1666 : weightSum += weight;
3295 : }
3296 : // Update best.
3297 1072 : if (weightSum > bestWeight) {
3298 : bestWeight = weightSum;
3299 : bestMAIndex = maIndex;
3300 : }
3301 : }
3302 :
3303 : // Now select chosen alternative in each constraint.
3304 2876 : for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
3305 2367 : cIndex != eIndex; ++cIndex) {
3306 1858 : AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
3307 1858 : if (cInfo.Type == InlineAsm::isClobber)
3308 : continue;
3309 1001 : cInfo.selectAlternative(bestMAIndex);
3310 : }
3311 : }
3312 : }
3313 :
3314 : // Check and hook up tied operands, choose constraint code to use.
3315 299541 : for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
3316 251586 : cIndex != eIndex; ++cIndex) {
3317 203631 : AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
3318 :
3319 : // If this is an output operand with a matching input operand, look up the
3320 : // matching input. If their types mismatch, e.g. one is an integer, the
3321 : // other is floating point, or their sizes are different, flag it as an
3322 : // error.
3323 203631 : if (OpInfo.hasMatchingInput()) {
3324 1004 : AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
3325 :
3326 1004 : if (OpInfo.ConstraintVT != Input.ConstraintVT) {
3327 : std::pair<unsigned, const TargetRegisterClass *> MatchRC =
3328 : getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
3329 78 : OpInfo.ConstraintVT);
3330 : std::pair<unsigned, const TargetRegisterClass *> InputRC =
3331 : getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
3332 78 : Input.ConstraintVT);
3333 39 : if ((OpInfo.ConstraintVT.isInteger() !=
3334 78 : Input.ConstraintVT.isInteger()) ||
3335 39 : (MatchRC.second != InputRC.second)) {
3336 0 : report_fatal_error("Unsupported asm: input constraint"
3337 : " with a matching output constraint of"
3338 : " incompatible type!");
3339 : }
3340 : }
3341 : }
3342 : }
3343 :
3344 47955 : return ConstraintOperands;
3345 : }
3346 :
3347 : /// Return an integer indicating how general CT is.
3348 : static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
3349 : switch (CT) {
3350 : case TargetLowering::C_Other:
3351 : case TargetLowering::C_Unknown:
3352 : return 0;
3353 : case TargetLowering::C_Register:
3354 : return 1;
3355 : case TargetLowering::C_RegisterClass:
3356 : return 2;
3357 : case TargetLowering::C_Memory:
3358 : return 3;
3359 : }
3360 0 : llvm_unreachable("Invalid constraint type");
3361 : }
3362 :
3363 : /// Examine constraint type and operand type and determine a weight value.
3364 : /// This object must already have been set up with the operand type
3365 : /// and the current alternative constraint selected.
3366 : TargetLowering::ConstraintWeight
3367 1927 : TargetLowering::getMultipleConstraintMatchWeight(
3368 : AsmOperandInfo &info, int maIndex) const {
3369 : InlineAsm::ConstraintCodeVector *rCodes;
3370 3854 : if (maIndex >= (int)info.multipleAlternatives.size())
3371 77 : rCodes = &info.Codes;
3372 : else
3373 3700 : rCodes = &info.multipleAlternatives[maIndex].Codes;
3374 : ConstraintWeight BestWeight = CW_Invalid;
3375 :
3376 : // Loop over the options, keeping track of the most general one.
3377 5994 : for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
3378 : ConstraintWeight weight =
3379 4280 : getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
3380 2140 : if (weight > BestWeight)
3381 : BestWeight = weight;
3382 : }
3383 :
3384 1927 : return BestWeight;
3385 : }
3386 :
3387 : /// Examine constraint type and operand type and determine a weight value.
3388 : /// This object must already have been set up with the operand type
3389 : /// and the current alternative constraint selected.
3390 : TargetLowering::ConstraintWeight
3391 1516 : TargetLowering::getSingleConstraintMatchWeight(
3392 : AsmOperandInfo &info, const char *constraint) const {
3393 : ConstraintWeight weight = CW_Invalid;
3394 1516 : Value *CallOperandVal = info.CallOperandVal;
3395 : // If we don't have a value, we can't do a match,
3396 : // but allow it at the lowest weight.
3397 1516 : if (!CallOperandVal)
3398 : return CW_Default;
3399 : // Look at the constraint type.
3400 1426 : switch (*constraint) {
3401 111 : case 'i': // immediate integer.
3402 : case 'n': // immediate integer with a known value.
3403 111 : if (isa<ConstantInt>(CallOperandVal))
3404 : weight = CW_Constant;
3405 : break;
3406 0 : case 's': // non-explicit intregal immediate.
3407 : if (isa<GlobalValue>(CallOperandVal))
3408 : weight = CW_Constant;
3409 : break;
3410 0 : case 'E': // immediate float if host format.
3411 : case 'F': // immediate float.
3412 0 : if (isa<ConstantFP>(CallOperandVal))
3413 : weight = CW_Constant;
3414 : break;
3415 : case '<': // memory operand with autodecrement.
3416 : case '>': // memory operand with autoincrement.
3417 : case 'm': // memory operand.
3418 : case 'o': // offsettable memory operand
3419 : case 'V': // non-offsettable memory operand
3420 : weight = CW_Memory;
3421 : break;
3422 664 : case 'r': // general register.
3423 : case 'g': // general register, memory operand or immediate integer.
3424 : // note: Clang converts "g" to "imr".
3425 1328 : if (CallOperandVal->getType()->isIntegerTy())
3426 : weight = CW_Register;
3427 : break;
3428 221 : case 'X': // any operand.
3429 : default:
3430 : weight = CW_Default;
3431 221 : break;
3432 : }
3433 : return weight;
3434 : }
3435 :
3436 : /// If there are multiple different constraints that we could pick for this
3437 : /// operand (e.g. "imr") try to pick the 'best' one.
3438 : /// This is somewhat tricky: constraints fall into four classes:
3439 : /// Other -> immediates and magic values
3440 : /// Register -> one specific register
3441 : /// RegisterClass -> a group of regs
3442 : /// Memory -> memory
3443 : /// Ideally, we would pick the most specific constraint possible: if we have
3444 : /// something that fits into a register, we would pick it. The problem here
3445 : /// is that if we have something that could either be in a register or in
3446 : /// memory that use of the register could cause selection of *other*
3447 : /// operands to fail: they might only succeed if we pick memory. Because of
3448 : /// this the heuristic we use is:
3449 : ///
3450 : /// 1) If there is an 'other' constraint, and if the operand is valid for
3451 : /// that constraint, use it. This makes us take advantage of 'i'
3452 : /// constraints when available.
3453 : /// 2) Otherwise, pick the most general constraint present. This prefers
3454 : /// 'm' over 'r', for example.
3455 : ///
3456 9768 : static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
3457 : const TargetLowering &TLI,
3458 : SDValue Op, SelectionDAG *DAG) {
3459 : assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
3460 : unsigned BestIdx = 0;
3461 : TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
3462 : int BestGenerality = -1;
3463 :
3464 : // Loop over the options, keeping track of the most general one.
3465 165364 : for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
3466 : TargetLowering::ConstraintType CType =
3467 291792 : TLI.getConstraintType(OpInfo.Codes[i]);
3468 :
3469 : // If this is an 'other' constraint, see if the operand is valid for it.
3470 : // For example, on X86 we might have an 'rI' constraint. If the operand
3471 : // is an integer in the range [0..31] we want to use I (saving a load
3472 : // of a register), otherwise we must use 'r'.
3473 145896 : if (CType == TargetLowering::C_Other && Op.getNode()) {
3474 : assert(OpInfo.Codes[i].size() == 1 &&
3475 : "Unhandled multi-letter 'other' constraint");
3476 : std::vector<SDValue> ResultOps;
3477 187 : TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
3478 187 : ResultOps, *DAG);
3479 187 : if (!ResultOps.empty()) {
3480 : BestType = CType;
3481 : BestIdx = i;
3482 : break;
3483 : }
3484 : }
3485 :
3486 : // Things with matching constraints can only be registers, per gcc
3487 : // documentation. This mainly affects "g" constraints.
3488 145828 : if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
3489 : continue;
3490 :
3491 : // This constraint letter is more general than the previous one, use it.
3492 145795 : int Generality = getConstraintGenerality(CType);
3493 145795 : if (Generality > BestGenerality) {
3494 : BestType = CType;
3495 : BestIdx = i;
3496 : BestGenerality = Generality;
3497 : }
3498 : }
3499 :
3500 19536 : OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
3501 9768 : OpInfo.ConstraintType = BestType;
3502 9768 : }
3503 :
3504 : /// Determines the constraint code and constraint type to use for the specific
3505 : /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
3506 256814 : void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
3507 : SDValue Op,
3508 : SelectionDAG *DAG) const {
3509 : assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
3510 :
3511 : // Single-letter constraints ('r') are very common.
3512 513628 : if (OpInfo.Codes.size() == 1) {
3513 247046 : OpInfo.ConstraintCode = OpInfo.Codes[0];
3514 494092 : OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
3515 : } else {
3516 9768 : ChooseConstraint(OpInfo, *this, Op, DAG);
3517 : }
3518 :
3519 : // 'X' matches anything.
3520 256814 : if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
3521 : // Labels and constants are handled elsewhere ('X' is the only thing
3522 : // that matches labels). For Functions, the type here is the type of
3523 : // the result, which is not what we want to look at; leave them alone.
3524 : Value *v = OpInfo.CallOperandVal;
3525 266 : if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
3526 : OpInfo.CallOperandVal = v;
3527 : return;
3528 : }
3529 :
3530 : // Otherwise, try to resolve it to something we know about by looking at
3531 : // the actual operand type.
3532 388 : if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
3533 : OpInfo.ConstraintCode = Repl;
3534 382 : OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
3535 : }
3536 : }
3537 : }
3538 :
3539 : /// Given an exact SDIV by a constant, create a multiplication
3540 : /// with the multiplicative inverse of the constant.
3541 3488 : static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
3542 : const SDLoc &dl, SelectionDAG &DAG,
3543 : SmallVectorImpl<SDNode *> &Created) {
3544 3488 : SDValue Op0 = N->getOperand(0);
3545 3488 : SDValue Op1 = N->getOperand(1);
3546 3488 : EVT VT = N->getValueType(0);
3547 3488 : EVT SVT = VT.getScalarType();
3548 3488 : EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
3549 3488 : EVT ShSVT = ShVT.getScalarType();
3550 :
3551 3488 : bool UseSRA = false;
3552 : SmallVector<SDValue, 16> Shifts, Factors;
3553 :
3554 : auto BuildSDIVPattern = [&](ConstantSDNode *C) {
3555 : if (C->isNullValue())
3556 : return false;
3557 : APInt Divisor = C->getAPIntValue();
3558 : unsigned Shift = Divisor.countTrailingZeros();
3559 : if (Shift) {
3560 : Divisor.ashrInPlace(Shift);
3561 : UseSRA = true;
3562 : }
3563 : // Calculate the multiplicative inverse, using Newton's method.
3564 : APInt t;
3565 : APInt Factor = Divisor;
3566 : while ((t = Divisor * Factor) != 1)
3567 : Factor *= APInt(Divisor.getBitWidth(), 2) - t;
3568 : Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
3569 : Factors.push_back(DAG.getConstant(Factor, dl, SVT));
3570 : return true;
3571 : };
3572 :
3573 : // Collect all magic values from the build vector.
3574 6976 : if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
3575 0 : return SDValue();
3576 :
3577 3488 : SDValue Shift, Factor;
3578 3488 : if (VT.isVector()) {
3579 12 : Shift = DAG.getBuildVector(ShVT, dl, Shifts);
3580 12 : Factor = DAG.getBuildVector(VT, dl, Factors);
3581 : } else {
3582 3476 : Shift = Shifts[0];
3583 3476 : Factor = Factors[0];
3584 : }
3585 :
3586 3488 : SDValue Res = Op0;
3587 :
3588 : // Shift the value upfront if it is even, so the LSB is one.
3589 3488 : if (UseSRA) {
3590 : // TODO: For UDIV use SRL instead of SRA.
3591 : SDNodeFlags Flags;
3592 : Flags.setExact(true);
3593 3482 : Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
3594 3482 : Created.push_back(Res.getNode());
3595 : }
3596 :
3597 3488 : return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
3598 : }
3599 :
3600 454 : SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
3601 : SelectionDAG &DAG,
3602 : SmallVectorImpl<SDNode *> &Created) const {
3603 454 : AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3604 454 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3605 908 : if (TLI.isIntDivCheap(N->getValueType(0), Attr))
3606 6 : return SDValue(N,0); // Lower SDIV as SDIV
3607 448 : return SDValue();
3608 : }
3609 :
3610 : /// Given an ISD::SDIV node expressing a divide by constant,
3611 : /// return a DAG expression to select that will generate the same value by
3612 : /// multiplying by a magic number.
3613 : /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
3614 3991 : SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
3615 : bool IsAfterLegalization,
3616 : SmallVectorImpl<SDNode *> &Created) const {
3617 : SDLoc dl(N);
3618 3991 : EVT VT = N->getValueType(0);
3619 3991 : EVT SVT = VT.getScalarType();
3620 3991 : EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
3621 3991 : EVT ShSVT = ShVT.getScalarType();
3622 : unsigned EltBits = VT.getScalarSizeInBits();
3623 :
3624 : // Check to see if we can do this.
3625 : // FIXME: We should be more aggressive here.
3626 3991 : if (!isTypeLegal(VT))
3627 65 : return SDValue();
3628 :
3629 : // If the sdiv has an 'exact' bit we can use a simpler lowering.
3630 3926 : if (N->getFlags().hasExact())
3631 3488 : return BuildExactSDIV(*this, N, dl, DAG, Created);
3632 :
3633 : SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
3634 :
3635 : auto BuildSDIVPattern = [&](ConstantSDNode *C) {
3636 : if (C->isNullValue())
3637 : return false;
3638 :
3639 : const APInt &Divisor = C->getAPIntValue();
3640 : APInt::ms magics = Divisor.magic();
3641 : int NumeratorFactor = 0;
3642 : int ShiftMask = -1;
3643 :
3644 : if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
3645 : // If d is +1/-1, we just multiply the numerator by +1/-1.
3646 : NumeratorFactor = Divisor.getSExtValue();
3647 : magics.m = 0;
3648 : magics.s = 0;
3649 : ShiftMask = 0;
3650 : } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
3651 : // If d > 0 and m < 0, add the numerator.
3652 : NumeratorFactor = 1;
3653 : } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
3654 : // If d < 0 and m > 0, subtract the numerator.
3655 : NumeratorFactor = -1;
3656 : }
3657 :
3658 : MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
3659 : Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
3660 : Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
3661 : ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
3662 : return true;
3663 : };
3664 :
3665 438 : SDValue N0 = N->getOperand(0);
3666 438 : SDValue N1 = N->getOperand(1);
3667 :
3668 : // Collect the shifts / magic values from each element.
3669 876 : if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
3670 0 : return SDValue();
3671 :
3672 438 : SDValue MagicFactor, Factor, Shift, ShiftMask;
3673 438 : if (VT.isVector()) {
3674 147 : MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
3675 147 : Factor = DAG.getBuildVector(VT, dl, Factors);
3676 147 : Shift = DAG.getBuildVector(ShVT, dl, Shifts);
3677 147 : ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
3678 : } else {
3679 291 : MagicFactor = MagicFactors[0];
3680 291 : Factor = Factors[0];
3681 291 : Shift = Shifts[0];
3682 291 : ShiftMask = ShiftMasks[0];
3683 : }
3684 :
3685 : // Multiply the numerator (operand 0) by the magic value.
3686 : // FIXME: We should support doing a MUL in a wider type.
3687 438 : SDValue Q;
3688 438 : if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
3689 : : isOperationLegalOrCustom(ISD::MULHS, VT))
3690 185 : Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
3691 253 : else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
3692 : : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
3693 : SDValue LoHi =
3694 222 : DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
3695 222 : Q = SDValue(LoHi.getNode(), 1);
3696 : } else
3697 31 : return SDValue(); // No mulhs or equivalent.
3698 407 : Created.push_back(Q.getNode());
3699 :
3700 : // (Optionally) Add/subtract the numerator using Factor.
3701 407 : Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
3702 407 : Created.push_back(Factor.getNode());
3703 407 : Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
3704 407 : Created.push_back(Q.getNode());
3705 :
3706 : // Shift right algebraic by shift value.
3707 407 : Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
3708 407 : Created.push_back(Q.getNode());
3709 :
3710 : // Extract the sign bit, mask it and add it to the quotient.
3711 407 : SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
3712 407 : SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
3713 407 : Created.push_back(T.getNode());
3714 407 : T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
3715 407 : Created.push_back(T.getNode());
3716 407 : return DAG.getNode(ISD::ADD, dl, VT, Q, T);
3717 : }
3718 :
3719 : /// Given an ISD::UDIV node expressing a divide by constant,
3720 : /// return a DAG expression to select that will generate the same value by
3721 : /// multiplying by a magic number.
3722 : /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
3723 925 : SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
3724 : bool IsAfterLegalization,
3725 : SmallVectorImpl<SDNode *> &Created) const {
3726 : SDLoc dl(N);
3727 925 : EVT VT = N->getValueType(0);
3728 925 : EVT SVT = VT.getScalarType();
3729 925 : EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
3730 925 : EVT ShSVT = ShVT.getScalarType();
3731 925 : unsigned EltBits = VT.getScalarSizeInBits();
3732 :
3733 : // Check to see if we can do this.
3734 : // FIXME: We should be more aggressive here.
3735 925 : if (!isTypeLegal(VT))
3736 73 : return SDValue();
3737 :
3738 852 : bool UseNPQ = false;
3739 : SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
3740 :
3741 : auto BuildUDIVPattern = [&](ConstantSDNode *C) {
3742 : if (C->isNullValue())
3743 : return false;
3744 : // FIXME: We should use a narrower constant when the upper
3745 : // bits are known to be zero.
3746 : APInt Divisor = C->getAPIntValue();
3747 : APInt::mu magics = Divisor.magicu();
3748 : unsigned PreShift = 0, PostShift = 0;
3749 :
3750 : // If the divisor is even, we can avoid using the expensive fixup by
3751 : // shifting the divided value upfront.
3752 : if (magics.a != 0 && !Divisor[0]) {
3753 : PreShift = Divisor.countTrailingZeros();
3754 : // Get magic number for the shifted divisor.
3755 : magics = Divisor.lshr(PreShift).magicu(PreShift);
3756 : assert(magics.a == 0 && "Should use cheap fixup now");
3757 : }
3758 :
3759 : APInt Magic = magics.m;
3760 :
3761 : unsigned SelNPQ;
3762 : if (magics.a == 0 || Divisor.isOneValue()) {
3763 : assert(magics.s < Divisor.getBitWidth() &&
3764 : "We shouldn't generate an undefined shift!");
3765 : PostShift = magics.s;
3766 : SelNPQ = false;
3767 : } else {
3768 : PostShift = magics.s - 1;
3769 : SelNPQ = true;
3770 : }
3771 :
3772 : PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
3773 : MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
3774 : NPQFactors.push_back(
3775 : DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
3776 : : APInt::getNullValue(EltBits),
3777 : dl, SVT));
3778 : PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
3779 : UseNPQ |= SelNPQ;
3780 : return true;
3781 : };
3782 :
3783 852 : SDValue N0 = N->getOperand(0);
3784 852 : SDValue N1 = N->getOperand(1);
3785 :
3786 : // Collect the shifts/magic values from each element.
3787 1704 : if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
3788 0 : return SDValue();
3789 :
3790 852 : SDValue PreShift, PostShift, MagicFactor, NPQFactor;
3791 852 : if (VT.isVector()) {
3792 201 : PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
3793 201 : MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
3794 201 : NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
3795 201 : PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
3796 : } else {
3797 651 : PreShift = PreShifts[0];
3798 651 : MagicFactor = MagicFactors[0];
3799 651 : PostShift = PostShifts[0];
3800 : }
3801 :
3802 852 : SDValue Q = N0;
3803 852 : Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
3804 852 : Created.push_back(Q.getNode());
3805 :
3806 : // FIXME: We should support doing a MUL in a wider type.
3807 : auto GetMULHU = [&](SDValue X, SDValue Y) {
3808 : if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
3809 : : isOperationLegalOrCustom(ISD::MULHU, VT))
3810 : return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
3811 : if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
3812 : : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
3813 : SDValue LoHi =
3814 : DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
3815 : return SDValue(LoHi.getNode(), 1);
3816 : }
3817 : return SDValue(); // No mulhu or equivalent
3818 852 : };
3819 :
3820 : // Multiply the numerator (operand 0) by the magic value.
3821 852 : Q = GetMULHU(Q, MagicFactor);
3822 852 : if (!Q)
3823 40 : return SDValue();
3824 :
3825 812 : Created.push_back(Q.getNode());
3826 :
3827 812 : if (UseNPQ) {
3828 284 : SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
3829 284 : Created.push_back(NPQ.getNode());
3830 :
3831 : // For vectors we might have a mix of non-NPQ/NPQ paths, so use
3832 : // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
3833 284 : if (VT.isVector())
3834 95 : NPQ = GetMULHU(NPQ, NPQFactor);
3835 : else
3836 189 : NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
3837 :
3838 284 : Created.push_back(NPQ.getNode());
3839 :
3840 284 : Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
3841 284 : Created.push_back(Q.getNode());
3842 : }
3843 :
3844 812 : Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
3845 812 : Created.push_back(Q.getNode());
3846 :
3847 812 : SDValue One = DAG.getConstant(1, dl, VT);
3848 812 : SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
3849 812 : return DAG.getSelect(dl, VT, IsOne, N0, Q);
3850 : }
3851 :
3852 68 : bool TargetLowering::
3853 : verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
3854 68 : if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3855 0 : DAG.getContext()->emitError("argument to '__builtin_return_address' must "
3856 : "be a constant integer");
3857 0 : return true;
3858 : }
3859 :
3860 : return false;
3861 : }
3862 :
3863 : //===----------------------------------------------------------------------===//
3864 : // Legalization Utilities
3865 : //===----------------------------------------------------------------------===//
3866 :
3867 2566 : bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
3868 : SDValue LHS, SDValue RHS,
3869 : SmallVectorImpl<SDValue> &Result,
3870 : EVT HiLoVT, SelectionDAG &DAG,
3871 : MulExpansionKind Kind, SDValue LL,
3872 : SDValue LH, SDValue RL, SDValue RH) const {
3873 : assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
3874 : Opcode == ISD::SMUL_LOHI);
3875 :
3876 2566 : bool HasMULHS = (Kind == MulExpansionKind::Always) ||
3877 2566 : isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
3878 2566 : bool HasMULHU = (Kind == MulExpansionKind::Always) ||
3879 2566 : isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
3880 2566 : bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
3881 2566 : isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
3882 2566 : bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
3883 2566 : isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
3884 :
3885 2566 : if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
3886 : return false;
3887 :
3888 : unsigned OuterBitSize = VT.getScalarSizeInBits();
3889 : unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
3890 2199 : unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
3891 2199 : unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
3892 :
3893 : // LL, LH, RL, and RH must be either all NULL or all set to a value.
3894 : assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
3895 : (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
3896 :
3897 2199 : SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
3898 : auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
3899 : bool Signed) -> bool {
3900 : if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
3901 : Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
3902 : Hi = SDValue(Lo.getNode(), 1);
3903 : return true;
3904 : }
3905 : if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
3906 : Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
3907 : Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
3908 : return true;
3909 : }
3910 : return false;
3911 2199 : };
3912 :
3913 2199 : SDValue Lo, Hi;
3914 :
3915 2199 : if (!LL.getNode() && !RL.getNode() &&
3916 : isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
3917 706 : LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
3918 706 : RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
3919 : }
3920 :
3921 2199 : if (!LL.getNode())
3922 : return false;
3923 :
3924 2199 : APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
3925 3631 : if (DAG.MaskedValueIsZero(LHS, HighMask) &&
3926 1432 : DAG.MaskedValueIsZero(RHS, HighMask)) {
3927 : // The inputs are both zero-extended.
3928 1429 : if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
3929 1429 : Result.push_back(Lo);
3930 1429 : Result.push_back(Hi);
3931 1429 : if (Opcode != ISD::MUL) {
3932 0 : SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
3933 0 : Result.push_back(Zero);
3934 0 : Result.push_back(Zero);
3935 : }
3936 1429 : return true;
3937 : }
3938 : }
3939 :
3940 770 : if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
3941 : RHSSB > InnerBitSize) {
3942 : // The input values are both sign-extended.
3943 : // TODO non-MUL case?
3944 211 : if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
3945 211 : Result.push_back(Lo);
3946 211 : Result.push_back(Hi);
3947 211 : return true;
3948 : }
3949 : }
3950 :
3951 559 : unsigned ShiftAmount = OuterBitSize - InnerBitSize;
3952 559 : EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
3953 559 : if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
3954 : // FIXME getShiftAmountTy does not always return a sensible result when VT
3955 : // is an illegal type, and so the type may be too small to fit the shift
3956 : // amount. Override it with i32. The shift will have to be legalized.
3957 0 : ShiftAmountTy = MVT::i32;
3958 : }
3959 559 : SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
3960 :
3961 271 : if (!LH.getNode() && !RH.getNode() &&
3962 559 : isOperationLegalOrCustom(ISD::SRL, VT) &&
3963 : isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
3964 271 : LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
3965 271 : LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
3966 271 : RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
3967 271 : RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
3968 : }
3969 :
3970 559 : if (!LH.getNode())
3971 : return false;
3972 :
3973 559 : if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
3974 : return false;
3975 :
3976 559 : Result.push_back(Lo);
3977 :
3978 559 : if (Opcode == ISD::MUL) {
3979 445 : RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
3980 445 : LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
3981 445 : Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
3982 445 : Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
3983 445 : Result.push_back(Hi);
3984 445 : return true;
3985 : }
3986 :
3987 : // Compute the full width result.
3988 : auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
3989 : Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
3990 : Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
3991 : Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
3992 : return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
3993 114 : };
3994 :
3995 114 : SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
3996 114 : if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
3997 : return false;
3998 :
3999 : // This is effectively the add part of a multiply-add of half-sized operands,
4000 : // so it cannot overflow.
4001 114 : Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
4002 :
4003 114 : if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
4004 : return false;
4005 :
4006 114 : Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
4007 114 : Merge(Lo, Hi));
4008 :
4009 114 : SDValue Carry = Next.getValue(1);
4010 114 : Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
4011 114 : Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
4012 :
4013 114 : if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
4014 : return false;
4015 :
4016 114 : SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
4017 114 : Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
4018 114 : Carry);
4019 114 : Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
4020 :
4021 114 : if (Opcode == ISD::SMUL_LOHI) {
4022 : SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
4023 0 : DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
4024 0 : Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
4025 :
4026 0 : NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
4027 0 : DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
4028 0 : Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
4029 : }
4030 :
4031 114 : Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
4032 114 : Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
4033 114 : Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
4034 114 : return true;
4035 : }
4036 :
4037 2452 : bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
4038 : SelectionDAG &DAG, MulExpansionKind Kind,
4039 : SDValue LL, SDValue LH, SDValue RL,
4040 : SDValue RH) const {
4041 : SmallVector<SDValue, 2> Result;
4042 7356 : bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
4043 2452 : N->getOperand(0), N->getOperand(1), Result, HiLoVT,
4044 : DAG, Kind, LL, LH, RL, RH);
4045 2452 : if (Ok) {
4046 : assert(Result.size() == 2);
4047 2085 : Lo = Result[0];
4048 2085 : Hi = Result[1];
4049 : }
4050 2452 : return Ok;
4051 : }
4052 :
4053 75 : bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
4054 : SelectionDAG &DAG) const {
4055 75 : EVT VT = Node->getOperand(0).getValueType();
4056 150 : EVT NVT = Node->getValueType(0);
4057 : SDLoc dl(SDValue(Node, 0));
4058 :
4059 : // FIXME: Only f32 to i64 conversions are supported.
4060 75 : if (VT != MVT::f32 || NVT != MVT::i64)
4061 0 : return false;
4062 :
4063 : // Expand f32 -> i64 conversion
4064 : // This algorithm comes from compiler-rt's implementation of fixsfdi:
4065 : // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
4066 75 : EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
4067 75 : VT.getSizeInBits());
4068 75 : SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
4069 75 : SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
4070 75 : SDValue Bias = DAG.getConstant(127, dl, IntVT);
4071 75 : SDValue SignMask = DAG.getConstant(APInt::getSignMask(VT.getSizeInBits()), dl,
4072 75 : IntVT);
4073 75 : SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT);
4074 75 : SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
4075 :
4076 150 : SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
4077 :
4078 75 : auto &DL = DAG.getDataLayout();
4079 : SDValue ExponentBits = DAG.getNode(
4080 : ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
4081 75 : DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT, DL)));
4082 75 : SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
4083 :
4084 : SDValue Sign = DAG.getNode(
4085 : ISD::SRA, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
4086 75 : DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT, DL)));
4087 75 : Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
4088 :
4089 : SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
4090 : DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
4091 75 : DAG.getConstant(0x00800000, dl, IntVT));
4092 :
4093 75 : R = DAG.getZExtOrTrunc(R, dl, NVT);
4094 :
4095 75 : R = DAG.getSelectCC(
4096 : dl, Exponent, ExponentLoBit,
4097 : DAG.getNode(ISD::SHL, dl, NVT, R,
4098 : DAG.getZExtOrTrunc(
4099 : DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
4100 : dl, getShiftAmountTy(IntVT, DL))),
4101 : DAG.getNode(ISD::SRL, dl, NVT, R,
4102 : DAG.getZExtOrTrunc(
4103 : DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
4104 : dl, getShiftAmountTy(IntVT, DL))),
4105 75 : ISD::SETGT);
4106 :
4107 : SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
4108 : DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
4109 75 : Sign);
4110 :
4111 75 : Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
4112 75 : DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT);
4113 75 : return true;
4114 : }
4115 :
4116 1767 : SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
4117 : SelectionDAG &DAG) const {
4118 : SDLoc SL(LD);
4119 1767 : SDValue Chain = LD->getChain();
4120 1767 : SDValue BasePTR = LD->getBasePtr();
4121 1767 : EVT SrcVT = LD->getMemoryVT();
4122 : ISD::LoadExtType ExtType = LD->getExtensionType();
4123 :
4124 : unsigned NumElem = SrcVT.getVectorNumElements();
4125 :
4126 1767 : EVT SrcEltVT = SrcVT.getScalarType();
4127 3534 : EVT DstEltVT = LD->getValueType(0).getScalarType();
4128 :
4129 1767 : unsigned Stride = SrcEltVT.getSizeInBits() / 8;
4130 : assert(SrcEltVT.isByteSized());
4131 :
4132 : SmallVector<SDValue, 8> Vals;
4133 : SmallVector<SDValue, 8> LoadChains;
4134 :
4135 9127 : for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
4136 : SDValue ScalarLoad =
4137 : DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
4138 7360 : LD->getPointerInfo().getWithOffset(Idx * Stride),
4139 7360 : SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
4140 14720 : LD->getMemOperand()->getFlags(), LD->getAAInfo());
4141 :
4142 7360 : BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
4143 :
4144 7360 : Vals.push_back(ScalarLoad.getValue(0));
4145 7360 : LoadChains.push_back(ScalarLoad.getValue(1));
4146 : }
4147 :
4148 1767 : SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
4149 3534 : SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
4150 :
4151 3534 : return DAG.getMergeValues({ Value, NewChain }, SL);
4152 : }
4153 :
4154 1739 : SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
4155 : SelectionDAG &DAG) const {
4156 : SDLoc SL(ST);
4157 :
4158 1739 : SDValue Chain = ST->getChain();
4159 1739 : SDValue BasePtr = ST->getBasePtr();
4160 1739 : SDValue Value = ST->getValue();
4161 1739 : EVT StVT = ST->getMemoryVT();
4162 :
4163 : // The type of the data we want to save
4164 1739 : EVT RegVT = Value.getValueType();
4165 1739 : EVT RegSclVT = RegVT.getScalarType();
4166 :
4167 : // The type of data as saved in memory.
4168 1739 : EVT MemSclVT = StVT.getScalarType();
4169 :
4170 1739 : EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
4171 : unsigned NumElem = StVT.getVectorNumElements();
4172 :
4173 : // A vector must always be stored in memory as-is, i.e. without any padding
4174 : // between the elements, since various code depend on it, e.g. in the
4175 : // handling of a bitcast of a vector type to int, which may be done with a
4176 : // vector store followed by an integer load. A vector that does not have
4177 : // elements that are byte-sized must therefore be stored as an integer
4178 : // built out of the extracted vector elements.
4179 1739 : if (!MemSclVT.isByteSized()) {
4180 247 : unsigned NumBits = StVT.getSizeInBits();
4181 247 : EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
4182 :
4183 247 : SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
4184 :
4185 2661 : for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
4186 : SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
4187 2414 : DAG.getConstant(Idx, SL, IdxVT));
4188 2414 : SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
4189 2414 : SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
4190 : unsigned ShiftIntoIdx =
4191 2414 : (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
4192 : SDValue ShiftAmount =
4193 2414 : DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
4194 : SDValue ShiftedElt =
4195 2414 : DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
4196 2414 : CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
4197 : }
4198 :
4199 247 : return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
4200 247 : ST->getAlignment(), ST->getMemOperand()->getFlags(),
4201 494 : ST->getAAInfo());
4202 : }
4203 :
4204 : // Store Stride in bytes
4205 1492 : unsigned Stride = MemSclVT.getSizeInBits() / 8;
4206 : assert (Stride && "Zero stride!");
4207 : // Extract each of the elements from the original vector and save them into
4208 : // memory individually.
4209 : SmallVector<SDValue, 8> Stores;
4210 9336 : for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
4211 : SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
4212 7844 : DAG.getConstant(Idx, SL, IdxVT));
4213 :
4214 7844 : SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
4215 :
4216 : // This scalar TruncStore may be illegal, but we legalize it later.
4217 : SDValue Store = DAG.getTruncStore(
4218 7844 : Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
4219 7844 : MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
4220 15688 : ST->getMemOperand()->getFlags(), ST->getAAInfo());
4221 :
4222 7844 : Stores.push_back(Store);
4223 : }
4224 :
4225 1492 : return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
4226 : }
4227 :
4228 : std::pair<SDValue, SDValue>
4229 1041 : TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
4230 : assert(LD->getAddressingMode() == ISD::UNINDEXED &&
4231 : "unaligned indexed loads not implemented!");
4232 1041 : SDValue Chain = LD->getChain();
4233 1041 : SDValue Ptr = LD->getBasePtr();
4234 1041 : EVT VT = LD->getValueType(0);
4235 1041 : EVT LoadedVT = LD->getMemoryVT();
4236 : SDLoc dl(LD);
4237 1041 : auto &MF = DAG.getMachineFunction();
4238 :
4239 2007 : if (VT.isFloatingPoint() || VT.isVector()) {
4240 87 : EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
4241 65 : if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
4242 6 : if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
4243 : LoadedVT.isVector()) {
4244 : // Scalarize the load and let the individual components be handled.
4245 0 : SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
4246 0 : if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
4247 0 : return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
4248 : return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
4249 : }
4250 :
4251 : // Expand to a (misaligned) integer load of the same size,
4252 : // then bitconvert to floating point or vector.
4253 : SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
4254 65 : LD->getMemOperand());
4255 65 : SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
4256 65 : if (LoadedVT != VT)
4257 0 : Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
4258 0 : ISD::ANY_EXTEND, dl, VT, Result);
4259 :
4260 65 : return std::make_pair(Result, newLoad.getValue(1));
4261 : }
4262 :
4263 : // Copy the value to a (aligned) stack slot using (unaligned) integer
4264 : // loads and stores, then do a (aligned) load from the stack slot.
4265 22 : MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
4266 : unsigned LoadedBytes = LoadedVT.getStoreSize();
4267 22 : unsigned RegBytes = RegVT.getSizeInBits() / 8;
4268 22 : unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
4269 :
4270 : // Make sure the stack slot is also aligned for the register type.
4271 22 : SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
4272 22 : auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
4273 : SmallVector<SDValue, 8> Stores;
4274 22 : SDValue StackPtr = StackBase;
4275 : unsigned Offset = 0;
4276 :
4277 22 : EVT PtrVT = Ptr.getValueType();
4278 22 : EVT StackPtrVT = StackPtr.getValueType();
4279 :
4280 22 : SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
4281 22 : SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
4282 :
4283 : // Do all but one copies using the full register width.
4284 43 : for (unsigned i = 1; i < NumRegs; i++) {
4285 : // Load one integer register's worth from the original location.
4286 : SDValue Load = DAG.getLoad(
4287 21 : RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
4288 42 : MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
4289 42 : LD->getAAInfo());
4290 : // Follow the load with a store to the stack slot. Remember the store.
4291 21 : Stores.push_back(DAG.getStore(
4292 : Load.getValue(1), dl, Load, StackPtr,
4293 21 : MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
4294 : // Increment the pointers.
4295 21 : Offset += RegBytes;
4296 :
4297 21 : Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
4298 21 : StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
4299 : }
4300 :
4301 : // The last copy may be partial. Do an extending load.
4302 22 : EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
4303 22 : 8 * (LoadedBytes - Offset));
4304 : SDValue Load =
4305 : DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
4306 22 : LD->getPointerInfo().getWithOffset(Offset), MemVT,
4307 22 : MinAlign(LD->getAlignment(), Offset),
4308 44 : LD->getMemOperand()->getFlags(), LD->getAAInfo());
4309 : // Follow the load with a store to the stack slot. Remember the store.
4310 : // On big-endian machines this requires a truncating store to ensure
4311 : // that the bits end up in the right place.
4312 22 : Stores.push_back(DAG.getTruncStore(
4313 : Load.getValue(1), dl, Load, StackPtr,
4314 44 : MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
4315 :
4316 : // The order of the stores doesn't matter - say it with a TokenFactor.
4317 22 : SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
4318 :
4319 : // Finally, perform the original load only redirected to the stack slot.
4320 22 : Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
4321 : MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
4322 22 : LoadedVT);
4323 :
4324 : // Callers expect a MERGE_VALUES node.
4325 : return std::make_pair(Load, TF);
4326 : }
4327 :
4328 : assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
4329 : "Unaligned load of unsupported type.");
4330 :
4331 : // Compute the new VT that is half the size of the old one. This is an
4332 : // integer MVT.
4333 954 : unsigned NumBits = LoadedVT.getSizeInBits();
4334 : EVT NewLoadedVT;
4335 954 : NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
4336 954 : NumBits >>= 1;
4337 :
4338 954 : unsigned Alignment = LD->getAlignment();
4339 954 : unsigned IncrementSize = NumBits / 8;
4340 : ISD::LoadExtType HiExtType = LD->getExtensionType();
4341 :
4342 : // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
4343 954 : if (HiExtType == ISD::NON_EXTLOAD)
4344 : HiExtType = ISD::ZEXTLOAD;
4345 :
4346 : // Load the value in two parts
4347 954 : SDValue Lo, Hi;
4348 954 : if (DAG.getDataLayout().isLittleEndian()) {
4349 880 : Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
4350 880 : NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
4351 2640 : LD->getAAInfo());
4352 :
4353 880 : Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
4354 880 : Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
4355 : LD->getPointerInfo().getWithOffset(IncrementSize),
4356 880 : NewLoadedVT, MinAlign(Alignment, IncrementSize),
4357 3520 : LD->getMemOperand()->getFlags(), LD->getAAInfo());
4358 : } else {
4359 74 : Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
4360 74 : NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
4361 222 : LD->getAAInfo());
4362 :
4363 74 : Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
4364 74 : Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
4365 : LD->getPointerInfo().getWithOffset(IncrementSize),
4366 74 : NewLoadedVT, MinAlign(Alignment, IncrementSize),
4367 296 : LD->getMemOperand()->getFlags(), LD->getAAInfo());
4368 : }
4369 :
4370 : // aggregate the two parts
4371 : SDValue ShiftAmount =
4372 : DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
4373 954 : DAG.getDataLayout()));
4374 954 : SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
4375 954 : Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
4376 :
4377 : SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
4378 954 : Hi.getValue(1));
4379 :
4380 954 : return std::make_pair(Result, TF);
4381 : }
4382 :
4383 972 : SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
4384 : SelectionDAG &DAG) const {
4385 : assert(ST->getAddressingMode() == ISD::UNINDEXED &&
4386 : "unaligned indexed stores not implemented!");
4387 972 : SDValue Chain = ST->getChain();
4388 972 : SDValue Ptr = ST->getBasePtr();
4389 972 : SDValue Val = ST->getValue();
4390 972 : EVT VT = Val.getValueType();
4391 972 : int Alignment = ST->getAlignment();
4392 972 : auto &MF = DAG.getMachineFunction();
4393 972 : EVT MemVT = ST->getMemoryVT();
4394 :
4395 : SDLoc dl(ST);
4396 1886 : if (MemVT.isFloatingPoint() || MemVT.isVector()) {
4397 91 : EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
4398 : if (isTypeLegal(intVT)) {
4399 24 : if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
4400 : MemVT.isVector()) {
4401 : // Scalarize the store and let the individual components be handled.
4402 18 : SDValue Result = scalarizeVectorStore(ST, DAG);
4403 :
4404 18 : return Result;
4405 : }
4406 : // Expand to a bitconvert of the value to the integer type of the
4407 : // same size, then a (misaligned) int store.
4408 : // FIXME: Does not handle truncating floating point stores!
4409 50 : SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
4410 50 : Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
4411 100 : Alignment, ST->getMemOperand()->getFlags());
4412 50 : return Result;
4413 : }
4414 : // Do a (aligned) store to a stack slot, then copy from the stack slot
4415 : // to the final destination using (unaligned) integer loads and stores.
4416 23 : EVT StoredVT = ST->getMemoryVT();
4417 : MVT RegVT =
4418 23 : getRegisterType(*DAG.getContext(),
4419 23 : EVT::getIntegerVT(*DAG.getContext(),
4420 23 : StoredVT.getSizeInBits()));
4421 23 : EVT PtrVT = Ptr.getValueType();
4422 : unsigned StoredBytes = StoredVT.getStoreSize();
4423 23 : unsigned RegBytes = RegVT.getSizeInBits() / 8;
4424 23 : unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
4425 :
4426 : // Make sure the stack slot is also aligned for the register type.
4427 23 : SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
4428 23 : auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
4429 :
4430 : // Perform the original store, only redirected to the stack slot.
4431 : SDValue Store = DAG.getTruncStore(
4432 : Chain, dl, Val, StackPtr,
4433 23 : MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoredVT);
4434 :
4435 23 : EVT StackPtrVT = StackPtr.getValueType();
4436 :
4437 23 : SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
4438 23 : SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
4439 : SmallVector<SDValue, 8> Stores;
4440 : unsigned Offset = 0;
4441 :
4442 : // Do all but one copies using the full register width.
4443 46 : for (unsigned i = 1; i < NumRegs; i++) {
4444 : // Load one integer register's worth from the stack slot.
4445 : SDValue Load = DAG.getLoad(
4446 : RegVT, dl, Store, StackPtr,
4447 23 : MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
4448 : // Store it to the final location. Remember the store.
4449 23 : Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
4450 23 : ST->getPointerInfo().getWithOffset(Offset),
4451 23 : MinAlign(ST->getAlignment(), Offset),
4452 23 : ST->getMemOperand()->getFlags()));
4453 : // Increment the pointers.
4454 23 : Offset += RegBytes;
4455 23 : StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
4456 23 : Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
4457 : }
4458 :
4459 : // The last store may be partial. Do a truncating store. On big-endian
4460 : // machines this requires an extending load from the stack slot to ensure
4461 : // that the bits are in the right place.
4462 23 : EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
4463 23 : 8 * (StoredBytes - Offset));
4464 :
4465 : // Load from the stack slot.
4466 : SDValue Load = DAG.getExtLoad(
4467 : ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
4468 23 : MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT);
4469 :
4470 23 : Stores.push_back(
4471 23 : DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
4472 23 : ST->getPointerInfo().getWithOffset(Offset), MemVT,
4473 23 : MinAlign(ST->getAlignment(), Offset),
4474 69 : ST->getMemOperand()->getFlags(), ST->getAAInfo()));
4475 : // The order of the stores doesn't matter - say it with a TokenFactor.
4476 23 : SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
4477 23 : return Result;
4478 : }
4479 :
4480 : assert(ST->getMemoryVT().isInteger() &&
4481 : !ST->getMemoryVT().isVector() &&
4482 : "Unaligned store of unknown type.");
4483 : // Get the half-size VT
4484 881 : EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
4485 881 : int NumBits = NewStoredVT.getSizeInBits();
4486 881 : int IncrementSize = NumBits / 8;
4487 :
4488 : // Divide the stored value in two parts.
4489 : SDValue ShiftAmount =
4490 : DAG.getConstant(NumBits, dl, getShiftAmountTy(Val.getValueType(),
4491 881 : DAG.getDataLayout()));
4492 881 : SDValue Lo = Val;
4493 881 : SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
4494 :
4495 : // Store the two parts
4496 : SDValue Store1, Store2;
4497 881 : Store1 = DAG.getTruncStore(Chain, dl,
4498 881 : DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
4499 : Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
4500 1762 : ST->getMemOperand()->getFlags());
4501 :
4502 881 : Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
4503 : Alignment = MinAlign(Alignment, IncrementSize);
4504 881 : Store2 = DAG.getTruncStore(
4505 881 : Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
4506 : ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
4507 3524 : ST->getMemOperand()->getFlags(), ST->getAAInfo());
4508 :
4509 : SDValue Result =
4510 881 : DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
4511 881 : return Result;
4512 : }
4513 :
4514 : SDValue
4515 36 : TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
4516 : const SDLoc &DL, EVT DataVT,
4517 : SelectionDAG &DAG,
4518 : bool IsCompressedMemory) const {
4519 36 : SDValue Increment;
4520 36 : EVT AddrVT = Addr.getValueType();
4521 36 : EVT MaskVT = Mask.getValueType();
4522 : assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
4523 : "Incompatible types of Data and Mask");
4524 36 : if (IsCompressedMemory) {
4525 : // Incrementing the pointer according to number of '1's in the mask.
4526 8 : EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
4527 8 : SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
4528 8 : if (MaskIntVT.getSizeInBits() < 32) {
4529 8 : MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
4530 8 : MaskIntVT = MVT::i32;
4531 : }
4532 :
4533 : // Count '1's with POPCNT.
4534 8 : Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
4535 8 : Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
4536 : // Scale is an element size in bytes.
4537 8 : SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
4538 8 : AddrVT);
4539 8 : Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
4540 : } else
4541 28 : Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
4542 :
4543 36 : return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
4544 : }
4545 :
4546 2052 : static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
4547 : SDValue Idx,
4548 : EVT VecVT,
4549 : const SDLoc &dl) {
4550 : if (isa<ConstantSDNode>(Idx))
4551 994 : return Idx;
4552 :
4553 1058 : EVT IdxVT = Idx.getValueType();
4554 : unsigned NElts = VecVT.getVectorNumElements();
4555 : if (isPowerOf2_32(NElts)) {
4556 : APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
4557 1058 : Log2_32(NElts));
4558 : return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
4559 1058 : DAG.getConstant(Imm, dl, IdxVT));
4560 : }
4561 :
4562 : return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
4563 0 : DAG.getConstant(NElts - 1, dl, IdxVT));
4564 : }
4565 :
4566 2052 : SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
4567 : SDValue VecPtr, EVT VecVT,
4568 : SDValue Index) const {
4569 : SDLoc dl(Index);
4570 : // Make sure the index type is big enough to compute in.
4571 4104 : Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
4572 :
4573 2052 : EVT EltVT = VecVT.getVectorElementType();
4574 :
4575 : // Calculate the element offset and add it to the pointer.
4576 2052 : unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
4577 : assert(EltSize * 8 == EltVT.getSizeInBits() &&
4578 : "Converting bits to bytes lost precision");
4579 :
4580 2052 : Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
4581 :
4582 2052 : EVT IdxVT = Index.getValueType();
4583 :
4584 2052 : Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
4585 2052 : DAG.getConstant(EltSize, dl, IdxVT));
4586 2052 : return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index);
4587 : }
4588 :
4589 : //===----------------------------------------------------------------------===//
4590 : // Implementation of Emulated TLS Model
4591 : //===----------------------------------------------------------------------===//
4592 :
4593 330 : SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
4594 : SelectionDAG &DAG) const {
4595 : // Access to address of TLS varialbe xyz is lowered to a function call:
4596 : // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
4597 330 : EVT PtrVT = getPointerTy(DAG.getDataLayout());
4598 330 : PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
4599 : SDLoc dl(GA);
4600 :
4601 : ArgListTy Args;
4602 : ArgListEntry Entry;
4603 330 : std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
4604 330 : Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
4605 : StringRef EmuTlsVarName(NameString);
4606 : GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
4607 : assert(EmuTlsVar && "Cannot find EmuTlsVar ");
4608 330 : Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
4609 330 : Entry.Ty = VoidPtrType;
4610 330 : Args.push_back(Entry);
4611 :
4612 330 : SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
4613 :
4614 330 : TargetLowering::CallLoweringInfo CLI(DAG);
4615 330 : CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
4616 330 : CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
4617 330 : std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4618 :
4619 : // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
4620 : // At last for X86 targets, maybe good for other targets too?
4621 330 : MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4622 : MFI.setAdjustsStack(true); // Is this only for X86 target?
4623 : MFI.setHasCalls(true);
4624 :
4625 : assert((GA->getOffset() == 0) &&
4626 : "Emulated TLS must have zero offset in GlobalAddressSDNode");
4627 330 : return CallResult.first;
4628 : }
4629 :
4630 25 : SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
4631 : SelectionDAG &DAG) const {
4632 : assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
4633 25 : if (!isCtlzFast())
4634 0 : return SDValue();
4635 25 : ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
4636 : SDLoc dl(Op);
4637 : if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
4638 46 : if (C->isNullValue() && CC == ISD::SETEQ) {
4639 7 : EVT VT = Op.getOperand(0).getValueType();
4640 7 : SDValue Zext = Op.getOperand(0);
4641 7 : if (VT.bitsLT(MVT::i32)) {
4642 0 : VT = MVT::i32;
4643 0 : Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
4644 : }
4645 7 : unsigned Log2b = Log2_32(VT.getSizeInBits());
4646 7 : SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
4647 : SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
4648 7 : DAG.getConstant(Log2b, dl, MVT::i32));
4649 7 : return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
4650 : }
4651 : }
4652 18 : return SDValue();
4653 : }
4654 :
4655 : SDValue
4656 14 : TargetLowering::getExpandedSignedSaturationAddition(SDNode *Node,
4657 : SelectionDAG &DAG) const {
4658 : assert(Node->getOpcode() == ISD::SADDSAT &&
4659 : "Expected method to receive SADDSAT node.");
4660 : assert(Node->getNumOperands() == 2 &&
4661 : "Expected SADDSAT node to have 2 operands.");
4662 :
4663 : SDLoc dl(Node);
4664 14 : SDValue LHS = Node->getOperand(0);
4665 14 : SDValue RHS = Node->getOperand(1);
4666 : assert(LHS.getValueType().isScalarInteger() &&
4667 : "Expected operands to be integers. Vector of int arguments should "
4668 : "already be unrolled.");
4669 : assert(RHS.getValueType().isScalarInteger() &&
4670 : "Expected operands to be integers. Vector of int arguments should "
4671 : "already be unrolled.");
4672 : assert(LHS.getValueType() == RHS.getValueType() &&
4673 : "Expected both operands of SADDSAT to be the same type");
4674 :
4675 14 : unsigned BitWidth = LHS.getValueSizeInBits();
4676 14 : EVT ResultType = LHS.getValueType();
4677 : EVT BoolVT =
4678 14 : getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ResultType);
4679 : SDValue Result =
4680 14 : DAG.getNode(ISD::SADDO, dl, DAG.getVTList(ResultType, BoolVT), LHS, RHS);
4681 14 : SDValue Sum = Result.getValue(0);
4682 14 : SDValue Overflow = Result.getValue(1);
4683 :
4684 : // SatMax -> Overflow && Sum < 0
4685 : // SatMin -> Overflow && Sum > 0
4686 28 : SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
4687 :
4688 14 : SDValue SumNeg = DAG.getSetCC(dl, BoolVT, Sum, Zero, ISD::SETLT);
4689 14 : APInt MinVal = APInt::getSignedMinValue(BitWidth);
4690 14 : APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
4691 14 : SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType);
4692 14 : SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType);
4693 :
4694 14 : Result = DAG.getSelect(dl, ResultType, SumNeg, SatMax, SatMin);
4695 14 : return DAG.getSelect(dl, ResultType, Overflow, Result, Sum);
4696 : }
|