File: | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp |
Warning: | line 7564, column 5 Value stored to 'IsScaledIndex' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This implements the TargetLowering class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/CodeGen/TargetLowering.h" |
14 | #include "llvm/ADT/STLExtras.h" |
15 | #include "llvm/CodeGen/CallingConvLower.h" |
16 | #include "llvm/CodeGen/MachineFrameInfo.h" |
17 | #include "llvm/CodeGen/MachineFunction.h" |
18 | #include "llvm/CodeGen/MachineJumpTableInfo.h" |
19 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
20 | #include "llvm/CodeGen/SelectionDAG.h" |
21 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
22 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
23 | #include "llvm/IR/DataLayout.h" |
24 | #include "llvm/IR/DerivedTypes.h" |
25 | #include "llvm/IR/GlobalVariable.h" |
26 | #include "llvm/IR/LLVMContext.h" |
27 | #include "llvm/MC/MCAsmInfo.h" |
28 | #include "llvm/MC/MCExpr.h" |
29 | #include "llvm/Support/ErrorHandling.h" |
30 | #include "llvm/Support/KnownBits.h" |
31 | #include "llvm/Support/MathExtras.h" |
32 | #include "llvm/Target/TargetLoweringObjectFile.h" |
33 | #include "llvm/Target/TargetMachine.h" |
34 | #include <cctype> |
35 | using namespace llvm; |
36 | |
37 | /// NOTE: The TargetMachine owns TLOF. |
38 | TargetLowering::TargetLowering(const TargetMachine &tm) |
39 | : TargetLoweringBase(tm) {} |
40 | |
41 | const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { |
42 | return nullptr; |
43 | } |
44 | |
45 | bool TargetLowering::isPositionIndependent() const { |
46 | return getTargetMachine().isPositionIndependent(); |
47 | } |
48 | |
49 | /// Check whether a given call node is in tail position within its function. If |
50 | /// so, it sets Chain to the input chain of the tail call. |
51 | bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, |
52 | SDValue &Chain) const { |
53 | const Function &F = DAG.getMachineFunction().getFunction(); |
54 | |
55 | // First, check if tail calls have been disabled in this function. |
56 | if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true") |
57 | return false; |
58 | |
59 | // Conservatively require the attributes of the call to match those of |
60 | // the return. Ignore NoAlias and NonNull because they don't affect the |
61 | // call sequence. |
62 | AttributeList CallerAttrs = F.getAttributes(); |
63 | if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) |
64 | .removeAttribute(Attribute::NoAlias) |
65 | .removeAttribute(Attribute::NonNull) |
66 | .hasAttributes()) |
67 | return false; |
68 | |
69 | // It's not safe to eliminate the sign / zero extension of the return value. |
70 | if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) || |
71 | CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) |
72 | return false; |
73 | |
74 | // Check if the only use is a function return node. |
75 | return isUsedByReturnOnly(Node, Chain); |
76 | } |
77 | |
78 | bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, |
79 | const uint32_t *CallerPreservedMask, |
80 | const SmallVectorImpl<CCValAssign> &ArgLocs, |
81 | const SmallVectorImpl<SDValue> &OutVals) const { |
82 | for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { |
83 | const CCValAssign &ArgLoc = ArgLocs[I]; |
84 | if (!ArgLoc.isRegLoc()) |
85 | continue; |
86 | MCRegister Reg = ArgLoc.getLocReg(); |
87 | // Only look at callee saved registers. |
88 | if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg)) |
89 | continue; |
90 | // Check that we pass the value used for the caller. |
91 | // (We look for a CopyFromReg reading a virtual register that is used |
92 | // for the function live-in value of register Reg) |
93 | SDValue Value = OutVals[I]; |
94 | if (Value->getOpcode() != ISD::CopyFromReg) |
95 | return false; |
96 | Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg(); |
97 | if (MRI.getLiveInPhysReg(ArgReg) != Reg) |
98 | return false; |
99 | } |
100 | return true; |
101 | } |
102 | |
103 | /// Set CallLoweringInfo attribute flags based on a call instruction |
104 | /// and called function attributes. |
105 | void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, |
106 | unsigned ArgIdx) { |
107 | IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt); |
108 | IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt); |
109 | IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg); |
110 | IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet); |
111 | IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest); |
112 | IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal); |
113 | IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated); |
114 | IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca); |
115 | IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned); |
116 | IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf); |
117 | IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); |
118 | Alignment = Call->getParamAlign(ArgIdx); |
119 | ByValType = nullptr; |
120 | if (IsByVal) |
121 | ByValType = Call->getParamByValType(ArgIdx); |
122 | PreallocatedType = nullptr; |
123 | if (IsPreallocated) |
124 | PreallocatedType = Call->getParamPreallocatedType(ArgIdx); |
125 | } |
126 | |
127 | /// Generate a libcall taking the given operands as arguments and returning a |
128 | /// result of type RetVT. |
129 | std::pair<SDValue, SDValue> |
130 | TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, |
131 | ArrayRef<SDValue> Ops, |
132 | MakeLibCallOptions CallOptions, |
133 | const SDLoc &dl, |
134 | SDValue InChain) const { |
135 | if (!InChain) |
136 | InChain = DAG.getEntryNode(); |
137 | |
138 | TargetLowering::ArgListTy Args; |
139 | Args.reserve(Ops.size()); |
140 | |
141 | TargetLowering::ArgListEntry Entry; |
142 | for (unsigned i = 0; i < Ops.size(); ++i) { |
143 | SDValue NewOp = Ops[i]; |
144 | Entry.Node = NewOp; |
145 | Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); |
146 | Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(), |
147 | CallOptions.IsSExt); |
148 | Entry.IsZExt = !Entry.IsSExt; |
149 | |
150 | if (CallOptions.IsSoften && |
151 | !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) { |
152 | Entry.IsSExt = Entry.IsZExt = false; |
153 | } |
154 | Args.push_back(Entry); |
155 | } |
156 | |
157 | if (LC == RTLIB::UNKNOWN_LIBCALL) |
158 | report_fatal_error("Unsupported library call operation!"); |
159 | SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), |
160 | getPointerTy(DAG.getDataLayout())); |
161 | |
162 | Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); |
163 | TargetLowering::CallLoweringInfo CLI(DAG); |
164 | bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt); |
165 | bool zeroExtend = !signExtend; |
166 | |
167 | if (CallOptions.IsSoften && |
168 | !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) { |
169 | signExtend = zeroExtend = false; |
170 | } |
171 | |
172 | CLI.setDebugLoc(dl) |
173 | .setChain(InChain) |
174 | .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) |
175 | .setNoReturn(CallOptions.DoesNotReturn) |
176 | .setDiscardResult(!CallOptions.IsReturnValueUsed) |
177 | .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization) |
178 | .setSExtResult(signExtend) |
179 | .setZExtResult(zeroExtend); |
180 | return LowerCallTo(CLI); |
181 | } |
182 | |
183 | bool TargetLowering::findOptimalMemOpLowering( |
184 | std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, |
185 | unsigned SrcAS, const AttributeList &FuncAttributes) const { |
186 | if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) |
187 | return false; |
188 | |
189 | EVT VT = getOptimalMemOpType(Op, FuncAttributes); |
190 | |
191 | if (VT == MVT::Other) { |
192 | // Use the largest integer type whose alignment constraints are satisfied. |
193 | // We only need to check DstAlign here as SrcAlign is always greater or |
194 | // equal to DstAlign (or zero). |
195 | VT = MVT::i64; |
196 | if (Op.isFixedDstAlign()) |
197 | while ( |
198 | Op.getDstAlign() < (VT.getSizeInBits() / 8) && |
199 | !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign().value())) |
200 | VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); |
201 | assert(VT.isInteger())((VT.isInteger()) ? static_cast<void> (0) : __assert_fail ("VT.isInteger()", "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 201, __PRETTY_FUNCTION__)); |
202 | |
203 | // Find the largest legal integer type. |
204 | MVT LVT = MVT::i64; |
205 | while (!isTypeLegal(LVT)) |
206 | LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); |
207 | assert(LVT.isInteger())((LVT.isInteger()) ? static_cast<void> (0) : __assert_fail ("LVT.isInteger()", "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 207, __PRETTY_FUNCTION__)); |
208 | |
209 | // If the type we've chosen is larger than the largest legal integer type |
210 | // then use that instead. |
211 | if (VT.bitsGT(LVT)) |
212 | VT = LVT; |
213 | } |
214 | |
215 | unsigned NumMemOps = 0; |
216 | uint64_t Size = Op.size(); |
217 | while (Size) { |
218 | unsigned VTSize = VT.getSizeInBits() / 8; |
219 | while (VTSize > Size) { |
220 | // For now, only use non-vector load / store's for the left-over pieces. |
221 | EVT NewVT = VT; |
222 | unsigned NewVTSize; |
223 | |
224 | bool Found = false; |
225 | if (VT.isVector() || VT.isFloatingPoint()) { |
226 | NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; |
227 | if (isOperationLegalOrCustom(ISD::STORE, NewVT) && |
228 | isSafeMemOpType(NewVT.getSimpleVT())) |
229 | Found = true; |
230 | else if (NewVT == MVT::i64 && |
231 | isOperationLegalOrCustom(ISD::STORE, MVT::f64) && |
232 | isSafeMemOpType(MVT::f64)) { |
233 | // i64 is usually not legal on 32-bit targets, but f64 may be. |
234 | NewVT = MVT::f64; |
235 | Found = true; |
236 | } |
237 | } |
238 | |
239 | if (!Found) { |
240 | do { |
241 | NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); |
242 | if (NewVT == MVT::i8) |
243 | break; |
244 | } while (!isSafeMemOpType(NewVT.getSimpleVT())); |
245 | } |
246 | NewVTSize = NewVT.getSizeInBits() / 8; |
247 | |
248 | // If the new VT cannot cover all of the remaining bits, then consider |
249 | // issuing a (or a pair of) unaligned and overlapping load / store. |
250 | bool Fast; |
251 | if (NumMemOps && Op.allowOverlap() && NewVTSize < Size && |
252 | allowsMisalignedMemoryAccesses( |
253 | VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 1, |
254 | MachineMemOperand::MONone, &Fast) && |
255 | Fast) |
256 | VTSize = Size; |
257 | else { |
258 | VT = NewVT; |
259 | VTSize = NewVTSize; |
260 | } |
261 | } |
262 | |
263 | if (++NumMemOps > Limit) |
264 | return false; |
265 | |
266 | MemOps.push_back(VT); |
267 | Size -= VTSize; |
268 | } |
269 | |
270 | return true; |
271 | } |
272 | |
273 | /// Soften the operands of a comparison. This code is shared among BR_CC, |
274 | /// SELECT_CC, and SETCC handlers. |
275 | void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, |
276 | SDValue &NewLHS, SDValue &NewRHS, |
277 | ISD::CondCode &CCCode, |
278 | const SDLoc &dl, const SDValue OldLHS, |
279 | const SDValue OldRHS) const { |
280 | SDValue Chain; |
281 | return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS, |
282 | OldRHS, Chain); |
283 | } |
284 | |
285 | void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, |
286 | SDValue &NewLHS, SDValue &NewRHS, |
287 | ISD::CondCode &CCCode, |
288 | const SDLoc &dl, const SDValue OldLHS, |
289 | const SDValue OldRHS, |
290 | SDValue &Chain, |
291 | bool IsSignaling) const { |
292 | // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc |
293 | // not supporting it. We can update this code when libgcc provides such |
294 | // functions. |
295 | |
296 | assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)(((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128) && "Unsupported setcc type!") ? static_cast <void> (0) : __assert_fail ("(VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128) && \"Unsupported setcc type!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 297, __PRETTY_FUNCTION__)) |
297 | && "Unsupported setcc type!")(((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128) && "Unsupported setcc type!") ? static_cast <void> (0) : __assert_fail ("(VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128) && \"Unsupported setcc type!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 297, __PRETTY_FUNCTION__)); |
298 | |
299 | // Expand into one or more soft-fp libcall(s). |
300 | RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; |
301 | bool ShouldInvertCC = false; |
302 | switch (CCCode) { |
303 | case ISD::SETEQ: |
304 | case ISD::SETOEQ: |
305 | LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : |
306 | (VT == MVT::f64) ? RTLIB::OEQ_F64 : |
307 | (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128; |
308 | break; |
309 | case ISD::SETNE: |
310 | case ISD::SETUNE: |
311 | LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : |
312 | (VT == MVT::f64) ? RTLIB::UNE_F64 : |
313 | (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128; |
314 | break; |
315 | case ISD::SETGE: |
316 | case ISD::SETOGE: |
317 | LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : |
318 | (VT == MVT::f64) ? RTLIB::OGE_F64 : |
319 | (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128; |
320 | break; |
321 | case ISD::SETLT: |
322 | case ISD::SETOLT: |
323 | LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : |
324 | (VT == MVT::f64) ? RTLIB::OLT_F64 : |
325 | (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; |
326 | break; |
327 | case ISD::SETLE: |
328 | case ISD::SETOLE: |
329 | LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : |
330 | (VT == MVT::f64) ? RTLIB::OLE_F64 : |
331 | (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128; |
332 | break; |
333 | case ISD::SETGT: |
334 | case ISD::SETOGT: |
335 | LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : |
336 | (VT == MVT::f64) ? RTLIB::OGT_F64 : |
337 | (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; |
338 | break; |
339 | case ISD::SETO: |
340 | ShouldInvertCC = true; |
341 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; |
342 | case ISD::SETUO: |
343 | LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : |
344 | (VT == MVT::f64) ? RTLIB::UO_F64 : |
345 | (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128; |
346 | break; |
347 | case ISD::SETONE: |
348 | // SETONE = O && UNE |
349 | ShouldInvertCC = true; |
350 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; |
351 | case ISD::SETUEQ: |
352 | LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : |
353 | (VT == MVT::f64) ? RTLIB::UO_F64 : |
354 | (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128; |
355 | LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : |
356 | (VT == MVT::f64) ? RTLIB::OEQ_F64 : |
357 | (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128; |
358 | break; |
359 | default: |
360 | // Invert CC for unordered comparisons |
361 | ShouldInvertCC = true; |
362 | switch (CCCode) { |
363 | case ISD::SETULT: |
364 | LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : |
365 | (VT == MVT::f64) ? RTLIB::OGE_F64 : |
366 | (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128; |
367 | break; |
368 | case ISD::SETULE: |
369 | LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : |
370 | (VT == MVT::f64) ? RTLIB::OGT_F64 : |
371 | (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; |
372 | break; |
373 | case ISD::SETUGT: |
374 | LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : |
375 | (VT == MVT::f64) ? RTLIB::OLE_F64 : |
376 | (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128; |
377 | break; |
378 | case ISD::SETUGE: |
379 | LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : |
380 | (VT == MVT::f64) ? RTLIB::OLT_F64 : |
381 | (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; |
382 | break; |
383 | default: llvm_unreachable("Do not know how to soften this setcc!")::llvm::llvm_unreachable_internal("Do not know how to soften this setcc!" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 383); |
384 | } |
385 | } |
386 | |
387 | // Use the target specific return value for comparions lib calls. |
388 | EVT RetVT = getCmpLibcallReturnType(); |
389 | SDValue Ops[2] = {NewLHS, NewRHS}; |
390 | TargetLowering::MakeLibCallOptions CallOptions; |
391 | EVT OpsVT[2] = { OldLHS.getValueType(), |
392 | OldRHS.getValueType() }; |
393 | CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true); |
394 | auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain); |
395 | NewLHS = Call.first; |
396 | NewRHS = DAG.getConstant(0, dl, RetVT); |
397 | |
398 | CCCode = getCmpLibcallCC(LC1); |
399 | if (ShouldInvertCC) { |
400 | assert(RetVT.isInteger())((RetVT.isInteger()) ? static_cast<void> (0) : __assert_fail ("RetVT.isInteger()", "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 400, __PRETTY_FUNCTION__)); |
401 | CCCode = getSetCCInverse(CCCode, RetVT); |
402 | } |
403 | |
404 | if (LC2 == RTLIB::UNKNOWN_LIBCALL) { |
405 | // Update Chain. |
406 | Chain = Call.second; |
407 | } else { |
408 | EVT SetCCVT = |
409 | getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT); |
410 | SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode); |
411 | auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain); |
412 | CCCode = getCmpLibcallCC(LC2); |
413 | if (ShouldInvertCC) |
414 | CCCode = getSetCCInverse(CCCode, RetVT); |
415 | NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode); |
416 | if (Chain) |
417 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second, |
418 | Call2.second); |
419 | NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl, |
420 | Tmp.getValueType(), Tmp, NewLHS); |
421 | NewRHS = SDValue(); |
422 | } |
423 | } |
424 | |
425 | /// Return the entry encoding for a jump table in the current function. The |
426 | /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. |
427 | unsigned TargetLowering::getJumpTableEncoding() const { |
428 | // In non-pic modes, just use the address of a block. |
429 | if (!isPositionIndependent()) |
430 | return MachineJumpTableInfo::EK_BlockAddress; |
431 | |
432 | // In PIC mode, if the target supports a GPRel32 directive, use it. |
433 | if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr) |
434 | return MachineJumpTableInfo::EK_GPRel32BlockAddress; |
435 | |
436 | // Otherwise, use a label difference. |
437 | return MachineJumpTableInfo::EK_LabelDifference32; |
438 | } |
439 | |
440 | SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, |
441 | SelectionDAG &DAG) const { |
442 | // If our PIC model is GP relative, use the global offset table as the base. |
443 | unsigned JTEncoding = getJumpTableEncoding(); |
444 | |
445 | if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) || |
446 | (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress)) |
447 | return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout())); |
448 | |
449 | return Table; |
450 | } |
451 | |
452 | /// This returns the relocation base for the given PIC jumptable, the same as |
453 | /// getPICJumpTableRelocBase, but as an MCExpr. |
454 | const MCExpr * |
455 | TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, |
456 | unsigned JTI,MCContext &Ctx) const{ |
457 | // The normal PIC reloc base is the label at the start of the jump table. |
458 | return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx); |
459 | } |
460 | |
461 | bool |
462 | TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { |
463 | const TargetMachine &TM = getTargetMachine(); |
464 | const GlobalValue *GV = GA->getGlobal(); |
465 | |
466 | // If the address is not even local to this DSO we will have to load it from |
467 | // a got and then add the offset. |
468 | if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) |
469 | return false; |
470 | |
471 | // If the code is position independent we will have to add a base register. |
472 | if (isPositionIndependent()) |
473 | return false; |
474 | |
475 | // Otherwise we can do it. |
476 | return true; |
477 | } |
478 | |
479 | //===----------------------------------------------------------------------===// |
480 | // Optimization Methods |
481 | //===----------------------------------------------------------------------===// |
482 | |
483 | /// If the specified instruction has a constant integer operand and there are |
484 | /// bits set in that constant that are not demanded, then clear those bits and |
485 | /// return true. |
486 | bool TargetLowering::ShrinkDemandedConstant(SDValue Op, |
487 | const APInt &DemandedBits, |
488 | const APInt &DemandedElts, |
489 | TargetLoweringOpt &TLO) const { |
490 | SDLoc DL(Op); |
491 | unsigned Opcode = Op.getOpcode(); |
492 | |
493 | // Do target-specific constant optimization. |
494 | if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) |
495 | return TLO.New.getNode(); |
496 | |
497 | // FIXME: ISD::SELECT, ISD::SELECT_CC |
498 | switch (Opcode) { |
499 | default: |
500 | break; |
501 | case ISD::XOR: |
502 | case ISD::AND: |
503 | case ISD::OR: { |
504 | auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
505 | if (!Op1C) |
506 | return false; |
507 | |
508 | // If this is a 'not' op, don't touch it because that's a canonical form. |
509 | const APInt &C = Op1C->getAPIntValue(); |
510 | if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C)) |
511 | return false; |
512 | |
513 | if (!C.isSubsetOf(DemandedBits)) { |
514 | EVT VT = Op.getValueType(); |
515 | SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT); |
516 | SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC); |
517 | return TLO.CombineTo(Op, NewOp); |
518 | } |
519 | |
520 | break; |
521 | } |
522 | } |
523 | |
524 | return false; |
525 | } |
526 | |
527 | bool TargetLowering::ShrinkDemandedConstant(SDValue Op, |
528 | const APInt &DemandedBits, |
529 | TargetLoweringOpt &TLO) const { |
530 | EVT VT = Op.getValueType(); |
531 | APInt DemandedElts = VT.isVector() |
532 | ? APInt::getAllOnesValue(VT.getVectorNumElements()) |
533 | : APInt(1, 1); |
534 | return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO); |
535 | } |
536 | |
537 | /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. |
538 | /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be |
539 | /// generalized for targets with other types of implicit widening casts. |
540 | bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, |
541 | const APInt &Demanded, |
542 | TargetLoweringOpt &TLO) const { |
543 | assert(Op.getNumOperands() == 2 &&((Op.getNumOperands() == 2 && "ShrinkDemandedOp only supports binary operators!" ) ? static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 2 && \"ShrinkDemandedOp only supports binary operators!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 544, __PRETTY_FUNCTION__)) |
544 | "ShrinkDemandedOp only supports binary operators!")((Op.getNumOperands() == 2 && "ShrinkDemandedOp only supports binary operators!" ) ? static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 2 && \"ShrinkDemandedOp only supports binary operators!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 544, __PRETTY_FUNCTION__)); |
545 | assert(Op.getNode()->getNumValues() == 1 &&((Op.getNode()->getNumValues() == 1 && "ShrinkDemandedOp only supports nodes with one result!" ) ? static_cast<void> (0) : __assert_fail ("Op.getNode()->getNumValues() == 1 && \"ShrinkDemandedOp only supports nodes with one result!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 546, __PRETTY_FUNCTION__)) |
546 | "ShrinkDemandedOp only supports nodes with one result!")((Op.getNode()->getNumValues() == 1 && "ShrinkDemandedOp only supports nodes with one result!" ) ? static_cast<void> (0) : __assert_fail ("Op.getNode()->getNumValues() == 1 && \"ShrinkDemandedOp only supports nodes with one result!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 546, __PRETTY_FUNCTION__)); |
547 | |
548 | SelectionDAG &DAG = TLO.DAG; |
549 | SDLoc dl(Op); |
550 | |
551 | // Early return, as this function cannot handle vector types. |
552 | if (Op.getValueType().isVector()) |
553 | return false; |
554 | |
555 | // Don't do this if the node has another user, which may require the |
556 | // full value. |
557 | if (!Op.getNode()->hasOneUse()) |
558 | return false; |
559 | |
560 | // Search for the smallest integer type with free casts to and from |
561 | // Op's type. For expedience, just check power-of-2 integer types. |
562 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
563 | unsigned DemandedSize = Demanded.getActiveBits(); |
564 | unsigned SmallVTBits = DemandedSize; |
565 | if (!isPowerOf2_32(SmallVTBits)) |
566 | SmallVTBits = NextPowerOf2(SmallVTBits); |
567 | for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { |
568 | EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits); |
569 | if (TLI.isTruncateFree(Op.getValueType(), SmallVT) && |
570 | TLI.isZExtFree(SmallVT, Op.getValueType())) { |
571 | // We found a type with free casts. |
572 | SDValue X = DAG.getNode( |
573 | Op.getOpcode(), dl, SmallVT, |
574 | DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)), |
575 | DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1))); |
576 | assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?")((DemandedSize <= SmallVTBits && "Narrowed below demanded bits?" ) ? static_cast<void> (0) : __assert_fail ("DemandedSize <= SmallVTBits && \"Narrowed below demanded bits?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 576, __PRETTY_FUNCTION__)); |
577 | SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X); |
578 | return TLO.CombineTo(Op, Z); |
579 | } |
580 | } |
581 | return false; |
582 | } |
583 | |
584 | bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
585 | DAGCombinerInfo &DCI) const { |
586 | SelectionDAG &DAG = DCI.DAG; |
587 | TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), |
588 | !DCI.isBeforeLegalizeOps()); |
589 | KnownBits Known; |
590 | |
591 | bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO); |
592 | if (Simplified) { |
593 | DCI.AddToWorklist(Op.getNode()); |
594 | DCI.CommitTargetLoweringOpt(TLO); |
595 | } |
596 | return Simplified; |
597 | } |
598 | |
599 | bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
600 | KnownBits &Known, |
601 | TargetLoweringOpt &TLO, |
602 | unsigned Depth, |
603 | bool AssumeSingleUse) const { |
604 | EVT VT = Op.getValueType(); |
605 | |
606 | // TODO: We can probably do more work on calculating the known bits and |
607 | // simplifying the operations for scalable vectors, but for now we just |
608 | // bail out. |
609 | if (VT.isScalableVector()) { |
610 | // Pretend we don't know anything for now. |
611 | Known = KnownBits(DemandedBits.getBitWidth()); |
612 | return false; |
613 | } |
614 | |
615 | APInt DemandedElts = VT.isVector() |
616 | ? APInt::getAllOnesValue(VT.getVectorNumElements()) |
617 | : APInt(1, 1); |
618 | return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth, |
619 | AssumeSingleUse); |
620 | } |
621 | |
622 | // TODO: Can we merge SelectionDAG::GetDemandedBits into this? |
623 | // TODO: Under what circumstances can we create nodes? Constant folding? |
624 | SDValue TargetLowering::SimplifyMultipleUseDemandedBits( |
625 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
626 | SelectionDAG &DAG, unsigned Depth) const { |
627 | // Limit search depth. |
628 | if (Depth >= SelectionDAG::MaxRecursionDepth) |
629 | return SDValue(); |
630 | |
631 | // Ignore UNDEFs. |
632 | if (Op.isUndef()) |
633 | return SDValue(); |
634 | |
635 | // Not demanding any bits/elts from Op. |
636 | if (DemandedBits == 0 || DemandedElts == 0) |
637 | return DAG.getUNDEF(Op.getValueType()); |
638 | |
639 | unsigned NumElts = DemandedElts.getBitWidth(); |
640 | unsigned BitWidth = DemandedBits.getBitWidth(); |
641 | KnownBits LHSKnown, RHSKnown; |
642 | switch (Op.getOpcode()) { |
643 | case ISD::BITCAST: { |
644 | SDValue Src = peekThroughBitcasts(Op.getOperand(0)); |
645 | EVT SrcVT = Src.getValueType(); |
646 | EVT DstVT = Op.getValueType(); |
647 | if (SrcVT == DstVT) |
648 | return Src; |
649 | |
650 | unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits(); |
651 | unsigned NumDstEltBits = DstVT.getScalarSizeInBits(); |
652 | if (NumSrcEltBits == NumDstEltBits) |
653 | if (SDValue V = SimplifyMultipleUseDemandedBits( |
654 | Src, DemandedBits, DemandedElts, DAG, Depth + 1)) |
655 | return DAG.getBitcast(DstVT, V); |
656 | |
657 | // TODO - bigendian once we have test coverage. |
658 | if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 && |
659 | DAG.getDataLayout().isLittleEndian()) { |
660 | unsigned Scale = NumDstEltBits / NumSrcEltBits; |
661 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
662 | APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); |
663 | APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); |
664 | for (unsigned i = 0; i != Scale; ++i) { |
665 | unsigned Offset = i * NumSrcEltBits; |
666 | APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); |
667 | if (!Sub.isNullValue()) { |
668 | DemandedSrcBits |= Sub; |
669 | for (unsigned j = 0; j != NumElts; ++j) |
670 | if (DemandedElts[j]) |
671 | DemandedSrcElts.setBit((j * Scale) + i); |
672 | } |
673 | } |
674 | |
675 | if (SDValue V = SimplifyMultipleUseDemandedBits( |
676 | Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1)) |
677 | return DAG.getBitcast(DstVT, V); |
678 | } |
679 | |
680 | // TODO - bigendian once we have test coverage. |
681 | if ((NumSrcEltBits % NumDstEltBits) == 0 && |
682 | DAG.getDataLayout().isLittleEndian()) { |
683 | unsigned Scale = NumSrcEltBits / NumDstEltBits; |
684 | unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; |
685 | APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); |
686 | APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); |
687 | for (unsigned i = 0; i != NumElts; ++i) |
688 | if (DemandedElts[i]) { |
689 | unsigned Offset = (i % Scale) * NumDstEltBits; |
690 | DemandedSrcBits.insertBits(DemandedBits, Offset); |
691 | DemandedSrcElts.setBit(i / Scale); |
692 | } |
693 | |
694 | if (SDValue V = SimplifyMultipleUseDemandedBits( |
695 | Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1)) |
696 | return DAG.getBitcast(DstVT, V); |
697 | } |
698 | |
699 | break; |
700 | } |
701 | case ISD::AND: { |
702 | LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
703 | RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
704 | |
705 | // If all of the demanded bits are known 1 on one side, return the other. |
706 | // These bits cannot contribute to the result of the 'and' in this |
707 | // context. |
708 | if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One)) |
709 | return Op.getOperand(0); |
710 | if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) |
711 | return Op.getOperand(1); |
712 | break; |
713 | } |
714 | case ISD::OR: { |
715 | LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
716 | RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
717 | |
718 | // If all of the demanded bits are known zero on one side, return the |
719 | // other. These bits cannot contribute to the result of the 'or' in this |
720 | // context. |
721 | if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero)) |
722 | return Op.getOperand(0); |
723 | if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) |
724 | return Op.getOperand(1); |
725 | break; |
726 | } |
727 | case ISD::XOR: { |
728 | LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
729 | RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
730 | |
731 | // If all of the demanded bits are known zero on one side, return the |
732 | // other. |
733 | if (DemandedBits.isSubsetOf(RHSKnown.Zero)) |
734 | return Op.getOperand(0); |
735 | if (DemandedBits.isSubsetOf(LHSKnown.Zero)) |
736 | return Op.getOperand(1); |
737 | break; |
738 | } |
739 | case ISD::SHL: { |
740 | // If we are only demanding sign bits then we can use the shift source |
741 | // directly. |
742 | if (const APInt *MaxSA = |
743 | DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) { |
744 | SDValue Op0 = Op.getOperand(0); |
745 | unsigned ShAmt = MaxSA->getZExtValue(); |
746 | unsigned NumSignBits = |
747 | DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); |
748 | unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros(); |
749 | if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits)) |
750 | return Op0; |
751 | } |
752 | break; |
753 | } |
754 | case ISD::SETCC: { |
755 | SDValue Op0 = Op.getOperand(0); |
756 | SDValue Op1 = Op.getOperand(1); |
757 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); |
758 | // If (1) we only need the sign-bit, (2) the setcc operands are the same |
759 | // width as the setcc result, and (3) the result of a setcc conforms to 0 or |
760 | // -1, we may be able to bypass the setcc. |
761 | if (DemandedBits.isSignMask() && |
762 | Op0.getScalarValueSizeInBits() == BitWidth && |
763 | getBooleanContents(Op0.getValueType()) == |
764 | BooleanContent::ZeroOrNegativeOneBooleanContent) { |
765 | // If we're testing X < 0, then this compare isn't needed - just use X! |
766 | // FIXME: We're limiting to integer types here, but this should also work |
767 | // if we don't care about FP signed-zero. The use of SETLT with FP means |
768 | // that we don't care about NaNs. |
769 | if (CC == ISD::SETLT && Op1.getValueType().isInteger() && |
770 | (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode()))) |
771 | return Op0; |
772 | } |
773 | break; |
774 | } |
775 | case ISD::SIGN_EXTEND_INREG: { |
776 | // If none of the extended bits are demanded, eliminate the sextinreg. |
777 | SDValue Op0 = Op.getOperand(0); |
778 | EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); |
779 | unsigned ExBits = ExVT.getScalarSizeInBits(); |
780 | if (DemandedBits.getActiveBits() <= ExBits) |
781 | return Op0; |
782 | // If the input is already sign extended, just drop the extension. |
783 | unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); |
784 | if (NumSignBits >= (BitWidth - ExBits + 1)) |
785 | return Op0; |
786 | break; |
787 | } |
788 | case ISD::ANY_EXTEND_VECTOR_INREG: |
789 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
790 | case ISD::ZERO_EXTEND_VECTOR_INREG: { |
791 | // If we only want the lowest element and none of extended bits, then we can |
792 | // return the bitcasted source vector. |
793 | SDValue Src = Op.getOperand(0); |
794 | EVT SrcVT = Src.getValueType(); |
795 | EVT DstVT = Op.getValueType(); |
796 | if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() && |
797 | DAG.getDataLayout().isLittleEndian() && |
798 | DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) { |
799 | return DAG.getBitcast(DstVT, Src); |
800 | } |
801 | break; |
802 | } |
803 | case ISD::INSERT_VECTOR_ELT: { |
804 | // If we don't demand the inserted element, return the base vector. |
805 | SDValue Vec = Op.getOperand(0); |
806 | auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); |
807 | EVT VecVT = Vec.getValueType(); |
808 | if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) && |
809 | !DemandedElts[CIdx->getZExtValue()]) |
810 | return Vec; |
811 | break; |
812 | } |
813 | case ISD::INSERT_SUBVECTOR: { |
814 | // If we don't demand the inserted subvector, return the base vector. |
815 | SDValue Vec = Op.getOperand(0); |
816 | SDValue Sub = Op.getOperand(1); |
817 | uint64_t Idx = Op.getConstantOperandVal(2); |
818 | unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); |
819 | if (DemandedElts.extractBits(NumSubElts, Idx) == 0) |
820 | return Vec; |
821 | break; |
822 | } |
823 | case ISD::VECTOR_SHUFFLE: { |
824 | ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); |
825 | |
826 | // If all the demanded elts are from one operand and are inline, |
827 | // then we can use the operand directly. |
828 | bool AllUndef = true, IdentityLHS = true, IdentityRHS = true; |
829 | for (unsigned i = 0; i != NumElts; ++i) { |
830 | int M = ShuffleMask[i]; |
831 | if (M < 0 || !DemandedElts[i]) |
832 | continue; |
833 | AllUndef = false; |
834 | IdentityLHS &= (M == (int)i); |
835 | IdentityRHS &= ((M - NumElts) == i); |
836 | } |
837 | |
838 | if (AllUndef) |
839 | return DAG.getUNDEF(Op.getValueType()); |
840 | if (IdentityLHS) |
841 | return Op.getOperand(0); |
842 | if (IdentityRHS) |
843 | return Op.getOperand(1); |
844 | break; |
845 | } |
846 | default: |
847 | if (Op.getOpcode() >= ISD::BUILTIN_OP_END) |
848 | if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode( |
849 | Op, DemandedBits, DemandedElts, DAG, Depth)) |
850 | return V; |
851 | break; |
852 | } |
853 | return SDValue(); |
854 | } |
855 | |
856 | SDValue TargetLowering::SimplifyMultipleUseDemandedBits( |
857 | SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG, |
858 | unsigned Depth) const { |
859 | EVT VT = Op.getValueType(); |
860 | APInt DemandedElts = VT.isVector() |
861 | ? APInt::getAllOnesValue(VT.getVectorNumElements()) |
862 | : APInt(1, 1); |
863 | return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG, |
864 | Depth); |
865 | } |
866 | |
867 | SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts( |
868 | SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, |
869 | unsigned Depth) const { |
870 | APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits()); |
871 | return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG, |
872 | Depth); |
873 | } |
874 | |
875 | /// Look at Op. At this point, we know that only the OriginalDemandedBits of the |
876 | /// result of Op are ever used downstream. If we can use this information to |
877 | /// simplify Op, create a new simplified DAG node and return true, returning the |
878 | /// original and new nodes in Old and New. Otherwise, analyze the expression and |
879 | /// return a mask of Known bits for the expression (used to simplify the |
880 | /// caller). The Known bits may only be accurate for those bits in the |
881 | /// OriginalDemandedBits and OriginalDemandedElts. |
882 | bool TargetLowering::SimplifyDemandedBits( |
883 | SDValue Op, const APInt &OriginalDemandedBits, |
884 | const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, |
885 | unsigned Depth, bool AssumeSingleUse) const { |
886 | unsigned BitWidth = OriginalDemandedBits.getBitWidth(); |
887 | assert(Op.getScalarValueSizeInBits() == BitWidth &&((Op.getScalarValueSizeInBits() == BitWidth && "Mask size mismatches value type size!" ) ? static_cast<void> (0) : __assert_fail ("Op.getScalarValueSizeInBits() == BitWidth && \"Mask size mismatches value type size!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 888, __PRETTY_FUNCTION__)) |
888 | "Mask size mismatches value type size!")((Op.getScalarValueSizeInBits() == BitWidth && "Mask size mismatches value type size!" ) ? static_cast<void> (0) : __assert_fail ("Op.getScalarValueSizeInBits() == BitWidth && \"Mask size mismatches value type size!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 888, __PRETTY_FUNCTION__)); |
889 | |
890 | // Don't know anything. |
891 | Known = KnownBits(BitWidth); |
892 | |
893 | // TODO: We can probably do more work on calculating the known bits and |
894 | // simplifying the operations for scalable vectors, but for now we just |
895 | // bail out. |
896 | if (Op.getValueType().isScalableVector()) |
897 | return false; |
898 | |
899 | unsigned NumElts = OriginalDemandedElts.getBitWidth(); |
900 | assert((!Op.getValueType().isVector() ||(((!Op.getValueType().isVector() || NumElts == Op.getValueType ().getVectorNumElements()) && "Unexpected vector size" ) ? static_cast<void> (0) : __assert_fail ("(!Op.getValueType().isVector() || NumElts == Op.getValueType().getVectorNumElements()) && \"Unexpected vector size\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 902, __PRETTY_FUNCTION__)) |
901 | NumElts == Op.getValueType().getVectorNumElements()) &&(((!Op.getValueType().isVector() || NumElts == Op.getValueType ().getVectorNumElements()) && "Unexpected vector size" ) ? static_cast<void> (0) : __assert_fail ("(!Op.getValueType().isVector() || NumElts == Op.getValueType().getVectorNumElements()) && \"Unexpected vector size\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 902, __PRETTY_FUNCTION__)) |
902 | "Unexpected vector size")(((!Op.getValueType().isVector() || NumElts == Op.getValueType ().getVectorNumElements()) && "Unexpected vector size" ) ? static_cast<void> (0) : __assert_fail ("(!Op.getValueType().isVector() || NumElts == Op.getValueType().getVectorNumElements()) && \"Unexpected vector size\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 902, __PRETTY_FUNCTION__)); |
903 | |
904 | APInt DemandedBits = OriginalDemandedBits; |
905 | APInt DemandedElts = OriginalDemandedElts; |
906 | SDLoc dl(Op); |
907 | auto &DL = TLO.DAG.getDataLayout(); |
908 | |
909 | // Undef operand. |
910 | if (Op.isUndef()) |
911 | return false; |
912 | |
913 | if (Op.getOpcode() == ISD::Constant) { |
914 | // We know all of the bits for a constant! |
915 | Known.One = cast<ConstantSDNode>(Op)->getAPIntValue(); |
916 | Known.Zero = ~Known.One; |
917 | return false; |
918 | } |
919 | |
920 | if (Op.getOpcode() == ISD::ConstantFP) { |
921 | // We know all of the bits for a floating point constant! |
922 | Known.One = cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt(); |
923 | Known.Zero = ~Known.One; |
924 | return false; |
925 | } |
926 | |
927 | // Other users may use these bits. |
928 | EVT VT = Op.getValueType(); |
929 | if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { |
930 | if (Depth != 0) { |
931 | // If not at the root, Just compute the Known bits to |
932 | // simplify things downstream. |
933 | Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); |
934 | return false; |
935 | } |
936 | // If this is the root being simplified, allow it to have multiple uses, |
937 | // just set the DemandedBits/Elts to all bits. |
938 | DemandedBits = APInt::getAllOnesValue(BitWidth); |
939 | DemandedElts = APInt::getAllOnesValue(NumElts); |
940 | } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) { |
941 | // Not demanding any bits/elts from Op. |
942 | return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); |
943 | } else if (Depth >= SelectionDAG::MaxRecursionDepth) { |
944 | // Limit search depth. |
945 | return false; |
946 | } |
947 | |
948 | KnownBits Known2; |
949 | switch (Op.getOpcode()) { |
950 | case ISD::TargetConstant: |
951 | llvm_unreachable("Can't simplify this node")::llvm::llvm_unreachable_internal("Can't simplify this node", "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 951); |
952 | case ISD::SCALAR_TO_VECTOR: { |
953 | if (!DemandedElts[0]) |
954 | return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); |
955 | |
956 | KnownBits SrcKnown; |
957 | SDValue Src = Op.getOperand(0); |
958 | unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); |
959 | APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth); |
960 | if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1)) |
961 | return true; |
962 | |
963 | // Upper elements are undef, so only get the knownbits if we just demand |
964 | // the bottom element. |
965 | if (DemandedElts == 1) |
966 | Known = SrcKnown.anyextOrTrunc(BitWidth); |
967 | break; |
968 | } |
969 | case ISD::BUILD_VECTOR: |
970 | // Collect the known bits that are shared by every demanded element. |
971 | // TODO: Call SimplifyDemandedBits for non-constant demanded elements. |
972 | Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); |
973 | return false; // Don't fall through, will infinitely loop. |
974 | case ISD::LOAD: { |
975 | LoadSDNode *LD = cast<LoadSDNode>(Op); |
976 | if (getTargetConstantFromLoad(LD)) { |
977 | Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); |
978 | return false; // Don't fall through, will infinitely loop. |
979 | } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { |
980 | // If this is a ZEXTLoad and we are looking at the loaded value. |
981 | EVT MemVT = LD->getMemoryVT(); |
982 | unsigned MemBits = MemVT.getScalarSizeInBits(); |
983 | Known.Zero.setBitsFrom(MemBits); |
984 | return false; // Don't fall through, will infinitely loop. |
985 | } |
986 | break; |
987 | } |
988 | case ISD::INSERT_VECTOR_ELT: { |
989 | SDValue Vec = Op.getOperand(0); |
990 | SDValue Scl = Op.getOperand(1); |
991 | auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); |
992 | EVT VecVT = Vec.getValueType(); |
993 | |
994 | // If index isn't constant, assume we need all vector elements AND the |
995 | // inserted element. |
996 | APInt DemandedVecElts(DemandedElts); |
997 | if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) { |
998 | unsigned Idx = CIdx->getZExtValue(); |
999 | DemandedVecElts.clearBit(Idx); |
1000 | |
1001 | // Inserted element is not required. |
1002 | if (!DemandedElts[Idx]) |
1003 | return TLO.CombineTo(Op, Vec); |
1004 | } |
1005 | |
1006 | KnownBits KnownScl; |
1007 | unsigned NumSclBits = Scl.getScalarValueSizeInBits(); |
1008 | APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits); |
1009 | if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1)) |
1010 | return true; |
1011 | |
1012 | Known = KnownScl.anyextOrTrunc(BitWidth); |
1013 | |
1014 | KnownBits KnownVec; |
1015 | if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO, |
1016 | Depth + 1)) |
1017 | return true; |
1018 | |
1019 | if (!!DemandedVecElts) { |
1020 | Known.One &= KnownVec.One; |
1021 | Known.Zero &= KnownVec.Zero; |
1022 | } |
1023 | |
1024 | return false; |
1025 | } |
1026 | case ISD::INSERT_SUBVECTOR: { |
1027 | // Demand any elements from the subvector and the remainder from the src its |
1028 | // inserted into. |
1029 | SDValue Src = Op.getOperand(0); |
1030 | SDValue Sub = Op.getOperand(1); |
1031 | uint64_t Idx = Op.getConstantOperandVal(2); |
1032 | unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); |
1033 | APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); |
1034 | APInt DemandedSrcElts = DemandedElts; |
1035 | DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); |
1036 | |
1037 | KnownBits KnownSub, KnownSrc; |
1038 | if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO, |
1039 | Depth + 1)) |
1040 | return true; |
1041 | if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO, |
1042 | Depth + 1)) |
1043 | return true; |
1044 | |
1045 | Known.Zero.setAllBits(); |
1046 | Known.One.setAllBits(); |
1047 | if (!!DemandedSubElts) { |
1048 | Known.One &= KnownSub.One; |
1049 | Known.Zero &= KnownSub.Zero; |
1050 | } |
1051 | if (!!DemandedSrcElts) { |
1052 | Known.One &= KnownSrc.One; |
1053 | Known.Zero &= KnownSrc.Zero; |
1054 | } |
1055 | |
1056 | // Attempt to avoid multi-use src if we don't need anything from it. |
1057 | if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() || |
1058 | !DemandedSrcElts.isAllOnesValue()) { |
1059 | SDValue NewSub = SimplifyMultipleUseDemandedBits( |
1060 | Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1); |
1061 | SDValue NewSrc = SimplifyMultipleUseDemandedBits( |
1062 | Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1); |
1063 | if (NewSub || NewSrc) { |
1064 | NewSub = NewSub ? NewSub : Sub; |
1065 | NewSrc = NewSrc ? NewSrc : Src; |
1066 | SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub, |
1067 | Op.getOperand(2)); |
1068 | return TLO.CombineTo(Op, NewOp); |
1069 | } |
1070 | } |
1071 | break; |
1072 | } |
1073 | case ISD::EXTRACT_SUBVECTOR: { |
1074 | // Offset the demanded elts by the subvector index. |
1075 | SDValue Src = Op.getOperand(0); |
1076 | if (Src.getValueType().isScalableVector()) |
1077 | break; |
1078 | uint64_t Idx = Op.getConstantOperandVal(1); |
1079 | unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); |
1080 | APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); |
1081 | |
1082 | if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO, |
1083 | Depth + 1)) |
1084 | return true; |
1085 | |
1086 | // Attempt to avoid multi-use src if we don't need anything from it. |
1087 | if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) { |
1088 | SDValue DemandedSrc = SimplifyMultipleUseDemandedBits( |
1089 | Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1); |
1090 | if (DemandedSrc) { |
1091 | SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, |
1092 | Op.getOperand(1)); |
1093 | return TLO.CombineTo(Op, NewOp); |
1094 | } |
1095 | } |
1096 | break; |
1097 | } |
1098 | case ISD::CONCAT_VECTORS: { |
1099 | Known.Zero.setAllBits(); |
1100 | Known.One.setAllBits(); |
1101 | EVT SubVT = Op.getOperand(0).getValueType(); |
1102 | unsigned NumSubVecs = Op.getNumOperands(); |
1103 | unsigned NumSubElts = SubVT.getVectorNumElements(); |
1104 | for (unsigned i = 0; i != NumSubVecs; ++i) { |
1105 | APInt DemandedSubElts = |
1106 | DemandedElts.extractBits(NumSubElts, i * NumSubElts); |
1107 | if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts, |
1108 | Known2, TLO, Depth + 1)) |
1109 | return true; |
1110 | // Known bits are shared by every demanded subvector element. |
1111 | if (!!DemandedSubElts) { |
1112 | Known.One &= Known2.One; |
1113 | Known.Zero &= Known2.Zero; |
1114 | } |
1115 | } |
1116 | break; |
1117 | } |
1118 | case ISD::VECTOR_SHUFFLE: { |
1119 | ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); |
1120 | |
1121 | // Collect demanded elements from shuffle operands.. |
1122 | APInt DemandedLHS(NumElts, 0); |
1123 | APInt DemandedRHS(NumElts, 0); |
1124 | for (unsigned i = 0; i != NumElts; ++i) { |
1125 | if (!DemandedElts[i]) |
1126 | continue; |
1127 | int M = ShuffleMask[i]; |
1128 | if (M < 0) { |
1129 | // For UNDEF elements, we don't know anything about the common state of |
1130 | // the shuffle result. |
1131 | DemandedLHS.clearAllBits(); |
1132 | DemandedRHS.clearAllBits(); |
1133 | break; |
1134 | } |
1135 | assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range")((0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range" ) ? static_cast<void> (0) : __assert_fail ("0 <= M && M < (int)(2 * NumElts) && \"Shuffle index out of range\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1135, __PRETTY_FUNCTION__)); |
1136 | if (M < (int)NumElts) |
1137 | DemandedLHS.setBit(M); |
1138 | else |
1139 | DemandedRHS.setBit(M - NumElts); |
1140 | } |
1141 | |
1142 | if (!!DemandedLHS || !!DemandedRHS) { |
1143 | SDValue Op0 = Op.getOperand(0); |
1144 | SDValue Op1 = Op.getOperand(1); |
1145 | |
1146 | Known.Zero.setAllBits(); |
1147 | Known.One.setAllBits(); |
1148 | if (!!DemandedLHS) { |
1149 | if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO, |
1150 | Depth + 1)) |
1151 | return true; |
1152 | Known.One &= Known2.One; |
1153 | Known.Zero &= Known2.Zero; |
1154 | } |
1155 | if (!!DemandedRHS) { |
1156 | if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO, |
1157 | Depth + 1)) |
1158 | return true; |
1159 | Known.One &= Known2.One; |
1160 | Known.Zero &= Known2.Zero; |
1161 | } |
1162 | |
1163 | // Attempt to avoid multi-use ops if we don't need anything from them. |
1164 | SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( |
1165 | Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1); |
1166 | SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( |
1167 | Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1); |
1168 | if (DemandedOp0 || DemandedOp1) { |
1169 | Op0 = DemandedOp0 ? DemandedOp0 : Op0; |
1170 | Op1 = DemandedOp1 ? DemandedOp1 : Op1; |
1171 | SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask); |
1172 | return TLO.CombineTo(Op, NewOp); |
1173 | } |
1174 | } |
1175 | break; |
1176 | } |
1177 | case ISD::AND: { |
1178 | SDValue Op0 = Op.getOperand(0); |
1179 | SDValue Op1 = Op.getOperand(1); |
1180 | |
1181 | // If the RHS is a constant, check to see if the LHS would be zero without |
1182 | // using the bits from the RHS. Below, we use knowledge about the RHS to |
1183 | // simplify the LHS, here we're using information from the LHS to simplify |
1184 | // the RHS. |
1185 | if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) { |
1186 | // Do not increment Depth here; that can cause an infinite loop. |
1187 | KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth); |
1188 | // If the LHS already has zeros where RHSC does, this 'and' is dead. |
1189 | if ((LHSKnown.Zero & DemandedBits) == |
1190 | (~RHSC->getAPIntValue() & DemandedBits)) |
1191 | return TLO.CombineTo(Op, Op0); |
1192 | |
1193 | // If any of the set bits in the RHS are known zero on the LHS, shrink |
1194 | // the constant. |
1195 | if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, |
1196 | DemandedElts, TLO)) |
1197 | return true; |
1198 | |
1199 | // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its |
1200 | // constant, but if this 'and' is only clearing bits that were just set by |
1201 | // the xor, then this 'and' can be eliminated by shrinking the mask of |
1202 | // the xor. For example, for a 32-bit X: |
1203 | // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1 |
1204 | if (isBitwiseNot(Op0) && Op0.hasOneUse() && |
1205 | LHSKnown.One == ~RHSC->getAPIntValue()) { |
1206 | SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1); |
1207 | return TLO.CombineTo(Op, Xor); |
1208 | } |
1209 | } |
1210 | |
1211 | if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, |
1212 | Depth + 1)) |
1213 | return true; |
1214 | assert(!Known.hasConflict() && "Bits known to be one AND zero?")((!Known.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1214, __PRETTY_FUNCTION__)); |
1215 | if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts, |
1216 | Known2, TLO, Depth + 1)) |
1217 | return true; |
1218 | assert(!Known2.hasConflict() && "Bits known to be one AND zero?")((!Known2.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known2.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1218, __PRETTY_FUNCTION__)); |
1219 | |
1220 | // Attempt to avoid multi-use ops if we don't need anything from them. |
1221 | if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { |
1222 | SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( |
1223 | Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); |
1224 | SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( |
1225 | Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); |
1226 | if (DemandedOp0 || DemandedOp1) { |
1227 | Op0 = DemandedOp0 ? DemandedOp0 : Op0; |
1228 | Op1 = DemandedOp1 ? DemandedOp1 : Op1; |
1229 | SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); |
1230 | return TLO.CombineTo(Op, NewOp); |
1231 | } |
1232 | } |
1233 | |
1234 | // If all of the demanded bits are known one on one side, return the other. |
1235 | // These bits cannot contribute to the result of the 'and'. |
1236 | if (DemandedBits.isSubsetOf(Known2.Zero | Known.One)) |
1237 | return TLO.CombineTo(Op, Op0); |
1238 | if (DemandedBits.isSubsetOf(Known.Zero | Known2.One)) |
1239 | return TLO.CombineTo(Op, Op1); |
1240 | // If all of the demanded bits in the inputs are known zeros, return zero. |
1241 | if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) |
1242 | return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT)); |
1243 | // If the RHS is a constant, see if we can simplify it. |
1244 | if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts, |
1245 | TLO)) |
1246 | return true; |
1247 | // If the operation can be done in a smaller type, do so. |
1248 | if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) |
1249 | return true; |
1250 | |
1251 | Known &= Known2; |
1252 | break; |
1253 | } |
1254 | case ISD::OR: { |
1255 | SDValue Op0 = Op.getOperand(0); |
1256 | SDValue Op1 = Op.getOperand(1); |
1257 | |
1258 | if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, |
1259 | Depth + 1)) |
1260 | return true; |
1261 | assert(!Known.hasConflict() && "Bits known to be one AND zero?")((!Known.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1261, __PRETTY_FUNCTION__)); |
1262 | if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts, |
1263 | Known2, TLO, Depth + 1)) |
1264 | return true; |
1265 | assert(!Known2.hasConflict() && "Bits known to be one AND zero?")((!Known2.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known2.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1265, __PRETTY_FUNCTION__)); |
1266 | |
1267 | // Attempt to avoid multi-use ops if we don't need anything from them. |
1268 | if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { |
1269 | SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( |
1270 | Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); |
1271 | SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( |
1272 | Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); |
1273 | if (DemandedOp0 || DemandedOp1) { |
1274 | Op0 = DemandedOp0 ? DemandedOp0 : Op0; |
1275 | Op1 = DemandedOp1 ? DemandedOp1 : Op1; |
1276 | SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); |
1277 | return TLO.CombineTo(Op, NewOp); |
1278 | } |
1279 | } |
1280 | |
1281 | // If all of the demanded bits are known zero on one side, return the other. |
1282 | // These bits cannot contribute to the result of the 'or'. |
1283 | if (DemandedBits.isSubsetOf(Known2.One | Known.Zero)) |
1284 | return TLO.CombineTo(Op, Op0); |
1285 | if (DemandedBits.isSubsetOf(Known.One | Known2.Zero)) |
1286 | return TLO.CombineTo(Op, Op1); |
1287 | // If the RHS is a constant, see if we can simplify it. |
1288 | if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) |
1289 | return true; |
1290 | // If the operation can be done in a smaller type, do so. |
1291 | if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) |
1292 | return true; |
1293 | |
1294 | Known |= Known2; |
1295 | break; |
1296 | } |
1297 | case ISD::XOR: { |
1298 | SDValue Op0 = Op.getOperand(0); |
1299 | SDValue Op1 = Op.getOperand(1); |
1300 | |
1301 | if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, |
1302 | Depth + 1)) |
1303 | return true; |
1304 | assert(!Known.hasConflict() && "Bits known to be one AND zero?")((!Known.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1304, __PRETTY_FUNCTION__)); |
1305 | if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO, |
1306 | Depth + 1)) |
1307 | return true; |
1308 | assert(!Known2.hasConflict() && "Bits known to be one AND zero?")((!Known2.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known2.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1308, __PRETTY_FUNCTION__)); |
1309 | |
1310 | // Attempt to avoid multi-use ops if we don't need anything from them. |
1311 | if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { |
1312 | SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( |
1313 | Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); |
1314 | SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( |
1315 | Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); |
1316 | if (DemandedOp0 || DemandedOp1) { |
1317 | Op0 = DemandedOp0 ? DemandedOp0 : Op0; |
1318 | Op1 = DemandedOp1 ? DemandedOp1 : Op1; |
1319 | SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); |
1320 | return TLO.CombineTo(Op, NewOp); |
1321 | } |
1322 | } |
1323 | |
1324 | // If all of the demanded bits are known zero on one side, return the other. |
1325 | // These bits cannot contribute to the result of the 'xor'. |
1326 | if (DemandedBits.isSubsetOf(Known.Zero)) |
1327 | return TLO.CombineTo(Op, Op0); |
1328 | if (DemandedBits.isSubsetOf(Known2.Zero)) |
1329 | return TLO.CombineTo(Op, Op1); |
1330 | // If the operation can be done in a smaller type, do so. |
1331 | if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) |
1332 | return true; |
1333 | |
1334 | // If all of the unknown bits are known to be zero on one side or the other |
1335 | // turn this into an *inclusive* or. |
1336 | // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 |
1337 | if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) |
1338 | return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1)); |
1339 | |
1340 | ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts); |
1341 | if (C) { |
1342 | // If one side is a constant, and all of the set bits in the constant are |
1343 | // also known set on the other side, turn this into an AND, as we know |
1344 | // the bits will be cleared. |
1345 | // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 |
1346 | // NB: it is okay if more bits are known than are requested |
1347 | if (C->getAPIntValue() == Known2.One) { |
1348 | SDValue ANDC = |
1349 | TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT); |
1350 | return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC)); |
1351 | } |
1352 | |
1353 | // If the RHS is a constant, see if we can change it. Don't alter a -1 |
1354 | // constant because that's a 'not' op, and that is better for combining |
1355 | // and codegen. |
1356 | if (!C->isAllOnesValue() && |
1357 | DemandedBits.isSubsetOf(C->getAPIntValue())) { |
1358 | // We're flipping all demanded bits. Flip the undemanded bits too. |
1359 | SDValue New = TLO.DAG.getNOT(dl, Op0, VT); |
1360 | return TLO.CombineTo(Op, New); |
1361 | } |
1362 | } |
1363 | |
1364 | // If we can't turn this into a 'not', try to shrink the constant. |
1365 | if (!C || !C->isAllOnesValue()) |
1366 | if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) |
1367 | return true; |
1368 | |
1369 | Known ^= Known2; |
1370 | break; |
1371 | } |
1372 | case ISD::SELECT: |
1373 | if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO, |
1374 | Depth + 1)) |
1375 | return true; |
1376 | if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO, |
1377 | Depth + 1)) |
1378 | return true; |
1379 | assert(!Known.hasConflict() && "Bits known to be one AND zero?")((!Known.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1379, __PRETTY_FUNCTION__)); |
1380 | assert(!Known2.hasConflict() && "Bits known to be one AND zero?")((!Known2.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known2.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1380, __PRETTY_FUNCTION__)); |
1381 | |
1382 | // If the operands are constants, see if we can simplify them. |
1383 | if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) |
1384 | return true; |
1385 | |
1386 | // Only known if known in both the LHS and RHS. |
1387 | Known.One &= Known2.One; |
1388 | Known.Zero &= Known2.Zero; |
1389 | break; |
1390 | case ISD::SELECT_CC: |
1391 | if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO, |
1392 | Depth + 1)) |
1393 | return true; |
1394 | if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO, |
1395 | Depth + 1)) |
1396 | return true; |
1397 | assert(!Known.hasConflict() && "Bits known to be one AND zero?")((!Known.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1397, __PRETTY_FUNCTION__)); |
1398 | assert(!Known2.hasConflict() && "Bits known to be one AND zero?")((!Known2.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known2.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1398, __PRETTY_FUNCTION__)); |
1399 | |
1400 | // If the operands are constants, see if we can simplify them. |
1401 | if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) |
1402 | return true; |
1403 | |
1404 | // Only known if known in both the LHS and RHS. |
1405 | Known.One &= Known2.One; |
1406 | Known.Zero &= Known2.Zero; |
1407 | break; |
1408 | case ISD::SETCC: { |
1409 | SDValue Op0 = Op.getOperand(0); |
1410 | SDValue Op1 = Op.getOperand(1); |
1411 | ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); |
1412 | // If (1) we only need the sign-bit, (2) the setcc operands are the same |
1413 | // width as the setcc result, and (3) the result of a setcc conforms to 0 or |
1414 | // -1, we may be able to bypass the setcc. |
1415 | if (DemandedBits.isSignMask() && |
1416 | Op0.getScalarValueSizeInBits() == BitWidth && |
1417 | getBooleanContents(Op0.getValueType()) == |
1418 | BooleanContent::ZeroOrNegativeOneBooleanContent) { |
1419 | // If we're testing X < 0, then this compare isn't needed - just use X! |
1420 | // FIXME: We're limiting to integer types here, but this should also work |
1421 | // if we don't care about FP signed-zero. The use of SETLT with FP means |
1422 | // that we don't care about NaNs. |
1423 | if (CC == ISD::SETLT && Op1.getValueType().isInteger() && |
1424 | (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode()))) |
1425 | return TLO.CombineTo(Op, Op0); |
1426 | |
1427 | // TODO: Should we check for other forms of sign-bit comparisons? |
1428 | // Examples: X <= -1, X >= 0 |
1429 | } |
1430 | if (getBooleanContents(Op0.getValueType()) == |
1431 | TargetLowering::ZeroOrOneBooleanContent && |
1432 | BitWidth > 1) |
1433 | Known.Zero.setBitsFrom(1); |
1434 | break; |
1435 | } |
1436 | case ISD::SHL: { |
1437 | SDValue Op0 = Op.getOperand(0); |
1438 | SDValue Op1 = Op.getOperand(1); |
1439 | EVT ShiftVT = Op1.getValueType(); |
1440 | |
1441 | if (const APInt *SA = |
1442 | TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { |
1443 | unsigned ShAmt = SA->getZExtValue(); |
1444 | if (ShAmt == 0) |
1445 | return TLO.CombineTo(Op, Op0); |
1446 | |
1447 | // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a |
1448 | // single shift. We can do this if the bottom bits (which are shifted |
1449 | // out) are never demanded. |
1450 | // TODO - support non-uniform vector amounts. |
1451 | if (Op0.getOpcode() == ISD::SRL) { |
1452 | if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) { |
1453 | if (const APInt *SA2 = |
1454 | TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) { |
1455 | unsigned C1 = SA2->getZExtValue(); |
1456 | unsigned Opc = ISD::SHL; |
1457 | int Diff = ShAmt - C1; |
1458 | if (Diff < 0) { |
1459 | Diff = -Diff; |
1460 | Opc = ISD::SRL; |
1461 | } |
1462 | SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT); |
1463 | return TLO.CombineTo( |
1464 | Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA)); |
1465 | } |
1466 | } |
1467 | } |
1468 | |
1469 | // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits |
1470 | // are not demanded. This will likely allow the anyext to be folded away. |
1471 | // TODO - support non-uniform vector amounts. |
1472 | if (Op0.getOpcode() == ISD::ANY_EXTEND) { |
1473 | SDValue InnerOp = Op0.getOperand(0); |
1474 | EVT InnerVT = InnerOp.getValueType(); |
1475 | unsigned InnerBits = InnerVT.getScalarSizeInBits(); |
1476 | if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits && |
1477 | isTypeDesirableForOp(ISD::SHL, InnerVT)) { |
1478 | EVT ShTy = getShiftAmountTy(InnerVT, DL); |
1479 | if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) |
1480 | ShTy = InnerVT; |
1481 | SDValue NarrowShl = |
1482 | TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp, |
1483 | TLO.DAG.getConstant(ShAmt, dl, ShTy)); |
1484 | return TLO.CombineTo( |
1485 | Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl)); |
1486 | } |
1487 | |
1488 | // Repeat the SHL optimization above in cases where an extension |
1489 | // intervenes: (shl (anyext (shr x, c1)), c2) to |
1490 | // (shl (anyext x), c2-c1). This requires that the bottom c1 bits |
1491 | // aren't demanded (as above) and that the shifted upper c1 bits of |
1492 | // x aren't demanded. |
1493 | // TODO - support non-uniform vector amounts. |
1494 | if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL && |
1495 | InnerOp.hasOneUse()) { |
1496 | if (const APInt *SA2 = |
1497 | TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) { |
1498 | unsigned InnerShAmt = SA2->getZExtValue(); |
1499 | if (InnerShAmt < ShAmt && InnerShAmt < InnerBits && |
1500 | DemandedBits.getActiveBits() <= |
1501 | (InnerBits - InnerShAmt + ShAmt) && |
1502 | DemandedBits.countTrailingZeros() >= ShAmt) { |
1503 | SDValue NewSA = |
1504 | TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT); |
1505 | SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, |
1506 | InnerOp.getOperand(0)); |
1507 | return TLO.CombineTo( |
1508 | Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA)); |
1509 | } |
1510 | } |
1511 | } |
1512 | } |
1513 | |
1514 | APInt InDemandedMask = DemandedBits.lshr(ShAmt); |
1515 | if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, |
1516 | Depth + 1)) |
1517 | return true; |
1518 | assert(!Known.hasConflict() && "Bits known to be one AND zero?")((!Known.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1518, __PRETTY_FUNCTION__)); |
1519 | Known.Zero <<= ShAmt; |
1520 | Known.One <<= ShAmt; |
1521 | // low bits known zero. |
1522 | Known.Zero.setLowBits(ShAmt); |
1523 | |
1524 | // Try shrinking the operation as long as the shift amount will still be |
1525 | // in range. |
1526 | if ((ShAmt < DemandedBits.getActiveBits()) && |
1527 | ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) |
1528 | return true; |
1529 | } |
1530 | |
1531 | // If we are only demanding sign bits then we can use the shift source |
1532 | // directly. |
1533 | if (const APInt *MaxSA = |
1534 | TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) { |
1535 | unsigned ShAmt = MaxSA->getZExtValue(); |
1536 | unsigned NumSignBits = |
1537 | TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); |
1538 | unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros(); |
1539 | if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits)) |
1540 | return TLO.CombineTo(Op, Op0); |
1541 | } |
1542 | break; |
1543 | } |
1544 | case ISD::SRL: { |
1545 | SDValue Op0 = Op.getOperand(0); |
1546 | SDValue Op1 = Op.getOperand(1); |
1547 | EVT ShiftVT = Op1.getValueType(); |
1548 | |
1549 | if (const APInt *SA = |
1550 | TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { |
1551 | unsigned ShAmt = SA->getZExtValue(); |
1552 | if (ShAmt == 0) |
1553 | return TLO.CombineTo(Op, Op0); |
1554 | |
1555 | // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a |
1556 | // single shift. We can do this if the top bits (which are shifted out) |
1557 | // are never demanded. |
1558 | // TODO - support non-uniform vector amounts. |
1559 | if (Op0.getOpcode() == ISD::SHL) { |
1560 | if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) { |
1561 | if (const APInt *SA2 = |
1562 | TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) { |
1563 | unsigned C1 = SA2->getZExtValue(); |
1564 | unsigned Opc = ISD::SRL; |
1565 | int Diff = ShAmt - C1; |
1566 | if (Diff < 0) { |
1567 | Diff = -Diff; |
1568 | Opc = ISD::SHL; |
1569 | } |
1570 | SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT); |
1571 | return TLO.CombineTo( |
1572 | Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA)); |
1573 | } |
1574 | } |
1575 | } |
1576 | |
1577 | APInt InDemandedMask = (DemandedBits << ShAmt); |
1578 | |
1579 | // If the shift is exact, then it does demand the low bits (and knows that |
1580 | // they are zero). |
1581 | if (Op->getFlags().hasExact()) |
1582 | InDemandedMask.setLowBits(ShAmt); |
1583 | |
1584 | // Compute the new bits that are at the top now. |
1585 | if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, |
1586 | Depth + 1)) |
1587 | return true; |
1588 | assert(!Known.hasConflict() && "Bits known to be one AND zero?")((!Known.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1588, __PRETTY_FUNCTION__)); |
1589 | Known.Zero.lshrInPlace(ShAmt); |
1590 | Known.One.lshrInPlace(ShAmt); |
1591 | // High bits known zero. |
1592 | Known.Zero.setHighBits(ShAmt); |
1593 | } |
1594 | break; |
1595 | } |
1596 | case ISD::SRA: { |
1597 | SDValue Op0 = Op.getOperand(0); |
1598 | SDValue Op1 = Op.getOperand(1); |
1599 | EVT ShiftVT = Op1.getValueType(); |
1600 | |
1601 | // If we only want bits that already match the signbit then we don't need |
1602 | // to shift. |
1603 | unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros(); |
1604 | if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >= |
1605 | NumHiDemandedBits) |
1606 | return TLO.CombineTo(Op, Op0); |
1607 | |
1608 | // If this is an arithmetic shift right and only the low-bit is set, we can |
1609 | // always convert this into a logical shr, even if the shift amount is |
1610 | // variable. The low bit of the shift cannot be an input sign bit unless |
1611 | // the shift amount is >= the size of the datatype, which is undefined. |
1612 | if (DemandedBits.isOneValue()) |
1613 | return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); |
1614 | |
1615 | if (const APInt *SA = |
1616 | TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { |
1617 | unsigned ShAmt = SA->getZExtValue(); |
1618 | if (ShAmt == 0) |
1619 | return TLO.CombineTo(Op, Op0); |
1620 | |
1621 | APInt InDemandedMask = (DemandedBits << ShAmt); |
1622 | |
1623 | // If the shift is exact, then it does demand the low bits (and knows that |
1624 | // they are zero). |
1625 | if (Op->getFlags().hasExact()) |
1626 | InDemandedMask.setLowBits(ShAmt); |
1627 | |
1628 | // If any of the demanded bits are produced by the sign extension, we also |
1629 | // demand the input sign bit. |
1630 | if (DemandedBits.countLeadingZeros() < ShAmt) |
1631 | InDemandedMask.setSignBit(); |
1632 | |
1633 | if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, |
1634 | Depth + 1)) |
1635 | return true; |
1636 | assert(!Known.hasConflict() && "Bits known to be one AND zero?")((!Known.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1636, __PRETTY_FUNCTION__)); |
1637 | Known.Zero.lshrInPlace(ShAmt); |
1638 | Known.One.lshrInPlace(ShAmt); |
1639 | |
1640 | // If the input sign bit is known to be zero, or if none of the top bits |
1641 | // are demanded, turn this into an unsigned shift right. |
1642 | if (Known.Zero[BitWidth - ShAmt - 1] || |
1643 | DemandedBits.countLeadingZeros() >= ShAmt) { |
1644 | SDNodeFlags Flags; |
1645 | Flags.setExact(Op->getFlags().hasExact()); |
1646 | return TLO.CombineTo( |
1647 | Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags)); |
1648 | } |
1649 | |
1650 | int Log2 = DemandedBits.exactLogBase2(); |
1651 | if (Log2 >= 0) { |
1652 | // The bit must come from the sign. |
1653 | SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT); |
1654 | return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA)); |
1655 | } |
1656 | |
1657 | if (Known.One[BitWidth - ShAmt - 1]) |
1658 | // New bits are known one. |
1659 | Known.One.setHighBits(ShAmt); |
1660 | |
1661 | // Attempt to avoid multi-use ops if we don't need anything from them. |
1662 | if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { |
1663 | SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( |
1664 | Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1); |
1665 | if (DemandedOp0) { |
1666 | SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1); |
1667 | return TLO.CombineTo(Op, NewOp); |
1668 | } |
1669 | } |
1670 | } |
1671 | break; |
1672 | } |
1673 | case ISD::FSHL: |
1674 | case ISD::FSHR: { |
1675 | SDValue Op0 = Op.getOperand(0); |
1676 | SDValue Op1 = Op.getOperand(1); |
1677 | SDValue Op2 = Op.getOperand(2); |
1678 | bool IsFSHL = (Op.getOpcode() == ISD::FSHL); |
1679 | |
1680 | if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) { |
1681 | unsigned Amt = SA->getAPIntValue().urem(BitWidth); |
1682 | |
1683 | // For fshl, 0-shift returns the 1st arg. |
1684 | // For fshr, 0-shift returns the 2nd arg. |
1685 | if (Amt == 0) { |
1686 | if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts, |
1687 | Known, TLO, Depth + 1)) |
1688 | return true; |
1689 | break; |
1690 | } |
1691 | |
1692 | // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt)) |
1693 | // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt) |
1694 | APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt)); |
1695 | APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt); |
1696 | if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO, |
1697 | Depth + 1)) |
1698 | return true; |
1699 | if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO, |
1700 | Depth + 1)) |
1701 | return true; |
1702 | |
1703 | Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt)); |
1704 | Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt)); |
1705 | Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt); |
1706 | Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt); |
1707 | Known.One |= Known2.One; |
1708 | Known.Zero |= Known2.Zero; |
1709 | } |
1710 | |
1711 | // For pow-2 bitwidths we only demand the bottom modulo amt bits. |
1712 | if (isPowerOf2_32(BitWidth)) { |
1713 | APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1); |
1714 | if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts, |
1715 | Known2, TLO, Depth + 1)) |
1716 | return true; |
1717 | } |
1718 | break; |
1719 | } |
1720 | case ISD::ROTL: |
1721 | case ISD::ROTR: { |
1722 | SDValue Op0 = Op.getOperand(0); |
1723 | SDValue Op1 = Op.getOperand(1); |
1724 | |
1725 | // If we're rotating an 0/-1 value, then it stays an 0/-1 value. |
1726 | if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1)) |
1727 | return TLO.CombineTo(Op, Op0); |
1728 | |
1729 | // For pow-2 bitwidths we only demand the bottom modulo amt bits. |
1730 | if (isPowerOf2_32(BitWidth)) { |
1731 | APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1); |
1732 | if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO, |
1733 | Depth + 1)) |
1734 | return true; |
1735 | } |
1736 | break; |
1737 | } |
1738 | case ISD::BITREVERSE: { |
1739 | SDValue Src = Op.getOperand(0); |
1740 | APInt DemandedSrcBits = DemandedBits.reverseBits(); |
1741 | if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO, |
1742 | Depth + 1)) |
1743 | return true; |
1744 | Known.One = Known2.One.reverseBits(); |
1745 | Known.Zero = Known2.Zero.reverseBits(); |
1746 | break; |
1747 | } |
1748 | case ISD::BSWAP: { |
1749 | SDValue Src = Op.getOperand(0); |
1750 | APInt DemandedSrcBits = DemandedBits.byteSwap(); |
1751 | if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO, |
1752 | Depth + 1)) |
1753 | return true; |
1754 | Known.One = Known2.One.byteSwap(); |
1755 | Known.Zero = Known2.Zero.byteSwap(); |
1756 | break; |
1757 | } |
1758 | case ISD::CTPOP: { |
1759 | // If only 1 bit is demanded, replace with PARITY as long as we're before |
1760 | // op legalization. |
1761 | // FIXME: Limit to scalars for now. |
1762 | if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector()) |
1763 | return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT, |
1764 | Op.getOperand(0))); |
1765 | |
1766 | Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); |
1767 | break; |
1768 | } |
1769 | case ISD::SIGN_EXTEND_INREG: { |
1770 | SDValue Op0 = Op.getOperand(0); |
1771 | EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); |
1772 | unsigned ExVTBits = ExVT.getScalarSizeInBits(); |
1773 | |
1774 | // If we only care about the highest bit, don't bother shifting right. |
1775 | if (DemandedBits.isSignMask()) { |
1776 | unsigned NumSignBits = |
1777 | TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); |
1778 | bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1; |
1779 | // However if the input is already sign extended we expect the sign |
1780 | // extension to be dropped altogether later and do not simplify. |
1781 | if (!AlreadySignExtended) { |
1782 | // Compute the correct shift amount type, which must be getShiftAmountTy |
1783 | // for scalar types after legalization. |
1784 | EVT ShiftAmtTy = VT; |
1785 | if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) |
1786 | ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL); |
1787 | |
1788 | SDValue ShiftAmt = |
1789 | TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy); |
1790 | return TLO.CombineTo(Op, |
1791 | TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt)); |
1792 | } |
1793 | } |
1794 | |
1795 | // If none of the extended bits are demanded, eliminate the sextinreg. |
1796 | if (DemandedBits.getActiveBits() <= ExVTBits) |
1797 | return TLO.CombineTo(Op, Op0); |
1798 | |
1799 | APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits); |
1800 | |
1801 | // Since the sign extended bits are demanded, we know that the sign |
1802 | // bit is demanded. |
1803 | InputDemandedBits.setBit(ExVTBits - 1); |
1804 | |
1805 | if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1)) |
1806 | return true; |
1807 | assert(!Known.hasConflict() && "Bits known to be one AND zero?")((!Known.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1807, __PRETTY_FUNCTION__)); |
1808 | |
1809 | // If the sign bit of the input is known set or clear, then we know the |
1810 | // top bits of the result. |
1811 | |
1812 | // If the input sign bit is known zero, convert this into a zero extension. |
1813 | if (Known.Zero[ExVTBits - 1]) |
1814 | return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT)); |
1815 | |
1816 | APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits); |
1817 | if (Known.One[ExVTBits - 1]) { // Input sign bit known set |
1818 | Known.One.setBitsFrom(ExVTBits); |
1819 | Known.Zero &= Mask; |
1820 | } else { // Input sign bit unknown |
1821 | Known.Zero &= Mask; |
1822 | Known.One &= Mask; |
1823 | } |
1824 | break; |
1825 | } |
1826 | case ISD::BUILD_PAIR: { |
1827 | EVT HalfVT = Op.getOperand(0).getValueType(); |
1828 | unsigned HalfBitWidth = HalfVT.getScalarSizeInBits(); |
1829 | |
1830 | APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth); |
1831 | APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth); |
1832 | |
1833 | KnownBits KnownLo, KnownHi; |
1834 | |
1835 | if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1)) |
1836 | return true; |
1837 | |
1838 | if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1)) |
1839 | return true; |
1840 | |
1841 | Known.Zero = KnownLo.Zero.zext(BitWidth) | |
1842 | KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth); |
1843 | |
1844 | Known.One = KnownLo.One.zext(BitWidth) | |
1845 | KnownHi.One.zext(BitWidth).shl(HalfBitWidth); |
1846 | break; |
1847 | } |
1848 | case ISD::ZERO_EXTEND: |
1849 | case ISD::ZERO_EXTEND_VECTOR_INREG: { |
1850 | SDValue Src = Op.getOperand(0); |
1851 | EVT SrcVT = Src.getValueType(); |
1852 | unsigned InBits = SrcVT.getScalarSizeInBits(); |
1853 | unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; |
1854 | bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG; |
1855 | |
1856 | // If none of the top bits are demanded, convert this into an any_extend. |
1857 | if (DemandedBits.getActiveBits() <= InBits) { |
1858 | // If we only need the non-extended bits of the bottom element |
1859 | // then we can just bitcast to the result. |
1860 | if (IsVecInReg && DemandedElts == 1 && |
1861 | VT.getSizeInBits() == SrcVT.getSizeInBits() && |
1862 | TLO.DAG.getDataLayout().isLittleEndian()) |
1863 | return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); |
1864 | |
1865 | unsigned Opc = |
1866 | IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND; |
1867 | if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) |
1868 | return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); |
1869 | } |
1870 | |
1871 | APInt InDemandedBits = DemandedBits.trunc(InBits); |
1872 | APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); |
1873 | if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, |
1874 | Depth + 1)) |
1875 | return true; |
1876 | assert(!Known.hasConflict() && "Bits known to be one AND zero?")((!Known.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1876, __PRETTY_FUNCTION__)); |
1877 | assert(Known.getBitWidth() == InBits && "Src width has changed?")((Known.getBitWidth() == InBits && "Src width has changed?" ) ? static_cast<void> (0) : __assert_fail ("Known.getBitWidth() == InBits && \"Src width has changed?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1877, __PRETTY_FUNCTION__)); |
1878 | Known = Known.zext(BitWidth); |
1879 | |
1880 | // Attempt to avoid multi-use ops if we don't need anything from them. |
1881 | if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( |
1882 | Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1)) |
1883 | return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc)); |
1884 | break; |
1885 | } |
1886 | case ISD::SIGN_EXTEND: |
1887 | case ISD::SIGN_EXTEND_VECTOR_INREG: { |
1888 | SDValue Src = Op.getOperand(0); |
1889 | EVT SrcVT = Src.getValueType(); |
1890 | unsigned InBits = SrcVT.getScalarSizeInBits(); |
1891 | unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; |
1892 | bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG; |
1893 | |
1894 | // If none of the top bits are demanded, convert this into an any_extend. |
1895 | if (DemandedBits.getActiveBits() <= InBits) { |
1896 | // If we only need the non-extended bits of the bottom element |
1897 | // then we can just bitcast to the result. |
1898 | if (IsVecInReg && DemandedElts == 1 && |
1899 | VT.getSizeInBits() == SrcVT.getSizeInBits() && |
1900 | TLO.DAG.getDataLayout().isLittleEndian()) |
1901 | return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); |
1902 | |
1903 | unsigned Opc = |
1904 | IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND; |
1905 | if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) |
1906 | return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); |
1907 | } |
1908 | |
1909 | APInt InDemandedBits = DemandedBits.trunc(InBits); |
1910 | APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); |
1911 | |
1912 | // Since some of the sign extended bits are demanded, we know that the sign |
1913 | // bit is demanded. |
1914 | InDemandedBits.setBit(InBits - 1); |
1915 | |
1916 | if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, |
1917 | Depth + 1)) |
1918 | return true; |
1919 | assert(!Known.hasConflict() && "Bits known to be one AND zero?")((!Known.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1919, __PRETTY_FUNCTION__)); |
1920 | assert(Known.getBitWidth() == InBits && "Src width has changed?")((Known.getBitWidth() == InBits && "Src width has changed?" ) ? static_cast<void> (0) : __assert_fail ("Known.getBitWidth() == InBits && \"Src width has changed?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1920, __PRETTY_FUNCTION__)); |
1921 | |
1922 | // If the sign bit is known one, the top bits match. |
1923 | Known = Known.sext(BitWidth); |
1924 | |
1925 | // If the sign bit is known zero, convert this to a zero extend. |
1926 | if (Known.isNonNegative()) { |
1927 | unsigned Opc = |
1928 | IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND; |
1929 | if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) |
1930 | return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); |
1931 | } |
1932 | |
1933 | // Attempt to avoid multi-use ops if we don't need anything from them. |
1934 | if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( |
1935 | Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1)) |
1936 | return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc)); |
1937 | break; |
1938 | } |
1939 | case ISD::ANY_EXTEND: |
1940 | case ISD::ANY_EXTEND_VECTOR_INREG: { |
1941 | SDValue Src = Op.getOperand(0); |
1942 | EVT SrcVT = Src.getValueType(); |
1943 | unsigned InBits = SrcVT.getScalarSizeInBits(); |
1944 | unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; |
1945 | bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG; |
1946 | |
1947 | // If we only need the bottom element then we can just bitcast. |
1948 | // TODO: Handle ANY_EXTEND? |
1949 | if (IsVecInReg && DemandedElts == 1 && |
1950 | VT.getSizeInBits() == SrcVT.getSizeInBits() && |
1951 | TLO.DAG.getDataLayout().isLittleEndian()) |
1952 | return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); |
1953 | |
1954 | APInt InDemandedBits = DemandedBits.trunc(InBits); |
1955 | APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); |
1956 | if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, |
1957 | Depth + 1)) |
1958 | return true; |
1959 | assert(!Known.hasConflict() && "Bits known to be one AND zero?")((!Known.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1959, __PRETTY_FUNCTION__)); |
1960 | assert(Known.getBitWidth() == InBits && "Src width has changed?")((Known.getBitWidth() == InBits && "Src width has changed?" ) ? static_cast<void> (0) : __assert_fail ("Known.getBitWidth() == InBits && \"Src width has changed?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 1960, __PRETTY_FUNCTION__)); |
1961 | Known = Known.anyext(BitWidth); |
1962 | |
1963 | // Attempt to avoid multi-use ops if we don't need anything from them. |
1964 | if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( |
1965 | Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1)) |
1966 | return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc)); |
1967 | break; |
1968 | } |
1969 | case ISD::TRUNCATE: { |
1970 | SDValue Src = Op.getOperand(0); |
1971 | |
1972 | // Simplify the input, using demanded bit information, and compute the known |
1973 | // zero/one bits live out. |
1974 | unsigned OperandBitWidth = Src.getScalarValueSizeInBits(); |
1975 | APInt TruncMask = DemandedBits.zext(OperandBitWidth); |
1976 | if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1)) |
1977 | return true; |
1978 | Known = Known.trunc(BitWidth); |
1979 | |
1980 | // Attempt to avoid multi-use ops if we don't need anything from them. |
1981 | if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( |
1982 | Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1)) |
1983 | return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc)); |
1984 | |
1985 | // If the input is only used by this truncate, see if we can shrink it based |
1986 | // on the known demanded bits. |
1987 | if (Src.getNode()->hasOneUse()) { |
1988 | switch (Src.getOpcode()) { |
1989 | default: |
1990 | break; |
1991 | case ISD::SRL: |
1992 | // Shrink SRL by a constant if none of the high bits shifted in are |
1993 | // demanded. |
1994 | if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT)) |
1995 | // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is |
1996 | // undesirable. |
1997 | break; |
1998 | |
1999 | SDValue ShAmt = Src.getOperand(1); |
2000 | auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt); |
2001 | if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth)) |
2002 | break; |
2003 | uint64_t ShVal = ShAmtC->getZExtValue(); |
2004 | |
2005 | APInt HighBits = |
2006 | APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth); |
2007 | HighBits.lshrInPlace(ShVal); |
2008 | HighBits = HighBits.trunc(BitWidth); |
2009 | |
2010 | if (!(HighBits & DemandedBits)) { |
2011 | // None of the shifted in bits are needed. Add a truncate of the |
2012 | // shift input, then shift it. |
2013 | if (TLO.LegalTypes()) |
2014 | ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL)); |
2015 | SDValue NewTrunc = |
2016 | TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0)); |
2017 | return TLO.CombineTo( |
2018 | Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt)); |
2019 | } |
2020 | break; |
2021 | } |
2022 | } |
2023 | |
2024 | assert(!Known.hasConflict() && "Bits known to be one AND zero?")((!Known.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2024, __PRETTY_FUNCTION__)); |
2025 | break; |
2026 | } |
2027 | case ISD::AssertZext: { |
2028 | // AssertZext demands all of the high bits, plus any of the low bits |
2029 | // demanded by its users. |
2030 | EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); |
2031 | APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits()); |
2032 | if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known, |
2033 | TLO, Depth + 1)) |
2034 | return true; |
2035 | assert(!Known.hasConflict() && "Bits known to be one AND zero?")((!Known.hasConflict() && "Bits known to be one AND zero?" ) ? static_cast<void> (0) : __assert_fail ("!Known.hasConflict() && \"Bits known to be one AND zero?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2035, __PRETTY_FUNCTION__)); |
2036 | |
2037 | Known.Zero |= ~InMask; |
2038 | break; |
2039 | } |
2040 | case ISD::EXTRACT_VECTOR_ELT: { |
2041 | SDValue Src = Op.getOperand(0); |
2042 | SDValue Idx = Op.getOperand(1); |
2043 | ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount(); |
2044 | unsigned EltBitWidth = Src.getScalarValueSizeInBits(); |
2045 | |
2046 | if (SrcEltCnt.isScalable()) |
2047 | return false; |
2048 | |
2049 | // Demand the bits from every vector element without a constant index. |
2050 | unsigned NumSrcElts = SrcEltCnt.getFixedValue(); |
2051 | APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); |
2052 | if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) |
2053 | if (CIdx->getAPIntValue().ult(NumSrcElts)) |
2054 | DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue()); |
2055 | |
2056 | // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know |
2057 | // anything about the extended bits. |
2058 | APInt DemandedSrcBits = DemandedBits; |
2059 | if (BitWidth > EltBitWidth) |
2060 | DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth); |
2061 | |
2062 | if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO, |
2063 | Depth + 1)) |
2064 | return true; |
2065 | |
2066 | // Attempt to avoid multi-use ops if we don't need anything from them. |
2067 | if (!DemandedSrcBits.isAllOnesValue() || |
2068 | !DemandedSrcElts.isAllOnesValue()) { |
2069 | if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits( |
2070 | Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) { |
2071 | SDValue NewOp = |
2072 | TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx); |
2073 | return TLO.CombineTo(Op, NewOp); |
2074 | } |
2075 | } |
2076 | |
2077 | Known = Known2; |
2078 | if (BitWidth > EltBitWidth) |
2079 | Known = Known.anyext(BitWidth); |
2080 | break; |
2081 | } |
2082 | case ISD::BITCAST: { |
2083 | SDValue Src = Op.getOperand(0); |
2084 | EVT SrcVT = Src.getValueType(); |
2085 | unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits(); |
2086 | |
2087 | // If this is an FP->Int bitcast and if the sign bit is the only |
2088 | // thing demanded, turn this into a FGETSIGN. |
2089 | if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() && |
2090 | DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) && |
2091 | SrcVT.isFloatingPoint()) { |
2092 | bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT); |
2093 | bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); |
2094 | if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 && |
2095 | SrcVT != MVT::f128) { |
2096 | // Cannot eliminate/lower SHL for f128 yet. |
2097 | EVT Ty = OpVTLegal ? VT : MVT::i32; |
2098 | // Make a FGETSIGN + SHL to move the sign bit into the appropriate |
2099 | // place. We expect the SHL to be eliminated by other optimizations. |
2100 | SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src); |
2101 | unsigned OpVTSizeInBits = Op.getValueSizeInBits(); |
2102 | if (!OpVTLegal && OpVTSizeInBits > 32) |
2103 | Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign); |
2104 | unsigned ShVal = Op.getValueSizeInBits() - 1; |
2105 | SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT); |
2106 | return TLO.CombineTo(Op, |
2107 | TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt)); |
2108 | } |
2109 | } |
2110 | |
2111 | // Bitcast from a vector using SimplifyDemanded Bits/VectorElts. |
2112 | // Demand the elt/bit if any of the original elts/bits are demanded. |
2113 | // TODO - bigendian once we have test coverage. |
2114 | if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 && |
2115 | TLO.DAG.getDataLayout().isLittleEndian()) { |
2116 | unsigned Scale = BitWidth / NumSrcEltBits; |
2117 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
2118 | APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); |
2119 | APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); |
2120 | for (unsigned i = 0; i != Scale; ++i) { |
2121 | unsigned Offset = i * NumSrcEltBits; |
2122 | APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); |
2123 | if (!Sub.isNullValue()) { |
2124 | DemandedSrcBits |= Sub; |
2125 | for (unsigned j = 0; j != NumElts; ++j) |
2126 | if (DemandedElts[j]) |
2127 | DemandedSrcElts.setBit((j * Scale) + i); |
2128 | } |
2129 | } |
2130 | |
2131 | APInt KnownSrcUndef, KnownSrcZero; |
2132 | if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef, |
2133 | KnownSrcZero, TLO, Depth + 1)) |
2134 | return true; |
2135 | |
2136 | KnownBits KnownSrcBits; |
2137 | if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, |
2138 | KnownSrcBits, TLO, Depth + 1)) |
2139 | return true; |
2140 | } else if ((NumSrcEltBits % BitWidth) == 0 && |
2141 | TLO.DAG.getDataLayout().isLittleEndian()) { |
2142 | unsigned Scale = NumSrcEltBits / BitWidth; |
2143 | unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; |
2144 | APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); |
2145 | APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); |
2146 | for (unsigned i = 0; i != NumElts; ++i) |
2147 | if (DemandedElts[i]) { |
2148 | unsigned Offset = (i % Scale) * BitWidth; |
2149 | DemandedSrcBits.insertBits(DemandedBits, Offset); |
2150 | DemandedSrcElts.setBit(i / Scale); |
2151 | } |
2152 | |
2153 | if (SrcVT.isVector()) { |
2154 | APInt KnownSrcUndef, KnownSrcZero; |
2155 | if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef, |
2156 | KnownSrcZero, TLO, Depth + 1)) |
2157 | return true; |
2158 | } |
2159 | |
2160 | KnownBits KnownSrcBits; |
2161 | if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, |
2162 | KnownSrcBits, TLO, Depth + 1)) |
2163 | return true; |
2164 | } |
2165 | |
2166 | // If this is a bitcast, let computeKnownBits handle it. Only do this on a |
2167 | // recursive call where Known may be useful to the caller. |
2168 | if (Depth > 0) { |
2169 | Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); |
2170 | return false; |
2171 | } |
2172 | break; |
2173 | } |
2174 | case ISD::ADD: |
2175 | case ISD::MUL: |
2176 | case ISD::SUB: { |
2177 | // Add, Sub, and Mul don't demand any bits in positions beyond that |
2178 | // of the highest bit demanded of them. |
2179 | SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); |
2180 | SDNodeFlags Flags = Op.getNode()->getFlags(); |
2181 | unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros(); |
2182 | APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); |
2183 | if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO, |
2184 | Depth + 1) || |
2185 | SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO, |
2186 | Depth + 1) || |
2187 | // See if the operation should be performed at a smaller bit width. |
2188 | ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) { |
2189 | if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { |
2190 | // Disable the nsw and nuw flags. We can no longer guarantee that we |
2191 | // won't wrap after simplification. |
2192 | Flags.setNoSignedWrap(false); |
2193 | Flags.setNoUnsignedWrap(false); |
2194 | SDValue NewOp = |
2195 | TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags); |
2196 | return TLO.CombineTo(Op, NewOp); |
2197 | } |
2198 | return true; |
2199 | } |
2200 | |
2201 | // Attempt to avoid multi-use ops if we don't need anything from them. |
2202 | if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { |
2203 | SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( |
2204 | Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1); |
2205 | SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( |
2206 | Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1); |
2207 | if (DemandedOp0 || DemandedOp1) { |
2208 | Flags.setNoSignedWrap(false); |
2209 | Flags.setNoUnsignedWrap(false); |
2210 | Op0 = DemandedOp0 ? DemandedOp0 : Op0; |
2211 | Op1 = DemandedOp1 ? DemandedOp1 : Op1; |
2212 | SDValue NewOp = |
2213 | TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags); |
2214 | return TLO.CombineTo(Op, NewOp); |
2215 | } |
2216 | } |
2217 | |
2218 | // If we have a constant operand, we may be able to turn it into -1 if we |
2219 | // do not demand the high bits. This can make the constant smaller to |
2220 | // encode, allow more general folding, or match specialized instruction |
2221 | // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that |
2222 | // is probably not useful (and could be detrimental). |
2223 | ConstantSDNode *C = isConstOrConstSplat(Op1); |
2224 | APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ); |
2225 | if (C && !C->isAllOnesValue() && !C->isOne() && |
2226 | (C->getAPIntValue() | HighMask).isAllOnesValue()) { |
2227 | SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT); |
2228 | // Disable the nsw and nuw flags. We can no longer guarantee that we |
2229 | // won't wrap after simplification. |
2230 | Flags.setNoSignedWrap(false); |
2231 | Flags.setNoUnsignedWrap(false); |
2232 | SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags); |
2233 | return TLO.CombineTo(Op, NewOp); |
2234 | } |
2235 | |
2236 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; |
2237 | } |
2238 | default: |
2239 | if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { |
2240 | if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts, |
2241 | Known, TLO, Depth)) |
2242 | return true; |
2243 | break; |
2244 | } |
2245 | |
2246 | // Just use computeKnownBits to compute output bits. |
2247 | Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); |
2248 | break; |
2249 | } |
2250 | |
2251 | // If we know the value of all of the demanded bits, return this as a |
2252 | // constant. |
2253 | if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) { |
2254 | // Avoid folding to a constant if any OpaqueConstant is involved. |
2255 | const SDNode *N = Op.getNode(); |
2256 | for (SDNodeIterator I = SDNodeIterator::begin(N), |
2257 | E = SDNodeIterator::end(N); |
2258 | I != E; ++I) { |
2259 | SDNode *Op = *I; |
2260 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) |
2261 | if (C->isOpaque()) |
2262 | return false; |
2263 | } |
2264 | if (VT.isInteger()) |
2265 | return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT)); |
2266 | if (VT.isFloatingPoint()) |
2267 | return TLO.CombineTo( |
2268 | Op, |
2269 | TLO.DAG.getConstantFP( |
2270 | APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT)); |
2271 | } |
2272 | |
2273 | return false; |
2274 | } |
2275 | |
2276 | bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op, |
2277 | const APInt &DemandedElts, |
2278 | APInt &KnownUndef, |
2279 | APInt &KnownZero, |
2280 | DAGCombinerInfo &DCI) const { |
2281 | SelectionDAG &DAG = DCI.DAG; |
2282 | TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), |
2283 | !DCI.isBeforeLegalizeOps()); |
2284 | |
2285 | bool Simplified = |
2286 | SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO); |
2287 | if (Simplified) { |
2288 | DCI.AddToWorklist(Op.getNode()); |
2289 | DCI.CommitTargetLoweringOpt(TLO); |
2290 | } |
2291 | |
2292 | return Simplified; |
2293 | } |
2294 | |
2295 | /// Given a vector binary operation and known undefined elements for each input |
2296 | /// operand, compute whether each element of the output is undefined. |
2297 | static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, |
2298 | const APInt &UndefOp0, |
2299 | const APInt &UndefOp1) { |
2300 | EVT VT = BO.getValueType(); |
2301 | assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&((DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() && "Vector binop only") ? static_cast< void> (0) : __assert_fail ("DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() && \"Vector binop only\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2302, __PRETTY_FUNCTION__)) |
2302 | "Vector binop only")((DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() && "Vector binop only") ? static_cast< void> (0) : __assert_fail ("DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() && \"Vector binop only\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2302, __PRETTY_FUNCTION__)); |
2303 | |
2304 | EVT EltVT = VT.getVectorElementType(); |
2305 | unsigned NumElts = VT.getVectorNumElements(); |
2306 | assert(UndefOp0.getBitWidth() == NumElts &&((UndefOp0.getBitWidth() == NumElts && UndefOp1.getBitWidth () == NumElts && "Bad type for undef analysis") ? static_cast <void> (0) : __assert_fail ("UndefOp0.getBitWidth() == NumElts && UndefOp1.getBitWidth() == NumElts && \"Bad type for undef analysis\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2307, __PRETTY_FUNCTION__)) |
2307 | UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis")((UndefOp0.getBitWidth() == NumElts && UndefOp1.getBitWidth () == NumElts && "Bad type for undef analysis") ? static_cast <void> (0) : __assert_fail ("UndefOp0.getBitWidth() == NumElts && UndefOp1.getBitWidth() == NumElts && \"Bad type for undef analysis\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2307, __PRETTY_FUNCTION__)); |
2308 | |
2309 | auto getUndefOrConstantElt = [&](SDValue V, unsigned Index, |
2310 | const APInt &UndefVals) { |
2311 | if (UndefVals[Index]) |
2312 | return DAG.getUNDEF(EltVT); |
2313 | |
2314 | if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) { |
2315 | // Try hard to make sure that the getNode() call is not creating temporary |
2316 | // nodes. Ignore opaque integers because they do not constant fold. |
2317 | SDValue Elt = BV->getOperand(Index); |
2318 | auto *C = dyn_cast<ConstantSDNode>(Elt); |
2319 | if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque())) |
2320 | return Elt; |
2321 | } |
2322 | |
2323 | return SDValue(); |
2324 | }; |
2325 | |
2326 | APInt KnownUndef = APInt::getNullValue(NumElts); |
2327 | for (unsigned i = 0; i != NumElts; ++i) { |
2328 | // If both inputs for this element are either constant or undef and match |
2329 | // the element type, compute the constant/undef result for this element of |
2330 | // the vector. |
2331 | // TODO: Ideally we would use FoldConstantArithmetic() here, but that does |
2332 | // not handle FP constants. The code within getNode() should be refactored |
2333 | // to avoid the danger of creating a bogus temporary node here. |
2334 | SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0); |
2335 | SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1); |
2336 | if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT) |
2337 | if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef()) |
2338 | KnownUndef.setBit(i); |
2339 | } |
2340 | return KnownUndef; |
2341 | } |
2342 | |
2343 | bool TargetLowering::SimplifyDemandedVectorElts( |
2344 | SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef, |
2345 | APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth, |
2346 | bool AssumeSingleUse) const { |
2347 | EVT VT = Op.getValueType(); |
2348 | unsigned Opcode = Op.getOpcode(); |
2349 | APInt DemandedElts = OriginalDemandedElts; |
2350 | unsigned NumElts = DemandedElts.getBitWidth(); |
2351 | assert(VT.isVector() && "Expected vector op")((VT.isVector() && "Expected vector op") ? static_cast <void> (0) : __assert_fail ("VT.isVector() && \"Expected vector op\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2351, __PRETTY_FUNCTION__)); |
2352 | |
2353 | KnownUndef = KnownZero = APInt::getNullValue(NumElts); |
2354 | |
2355 | // TODO: For now we assume we know nothing about scalable vectors. |
2356 | if (VT.isScalableVector()) |
2357 | return false; |
2358 | |
2359 | assert(VT.getVectorNumElements() == NumElts &&((VT.getVectorNumElements() == NumElts && "Mask size mismatches value type element count!" ) ? static_cast<void> (0) : __assert_fail ("VT.getVectorNumElements() == NumElts && \"Mask size mismatches value type element count!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2360, __PRETTY_FUNCTION__)) |
2360 | "Mask size mismatches value type element count!")((VT.getVectorNumElements() == NumElts && "Mask size mismatches value type element count!" ) ? static_cast<void> (0) : __assert_fail ("VT.getVectorNumElements() == NumElts && \"Mask size mismatches value type element count!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2360, __PRETTY_FUNCTION__)); |
2361 | |
2362 | // Undef operand. |
2363 | if (Op.isUndef()) { |
2364 | KnownUndef.setAllBits(); |
2365 | return false; |
2366 | } |
2367 | |
2368 | // If Op has other users, assume that all elements are needed. |
2369 | if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) |
2370 | DemandedElts.setAllBits(); |
2371 | |
2372 | // Not demanding any elements from Op. |
2373 | if (DemandedElts == 0) { |
2374 | KnownUndef.setAllBits(); |
2375 | return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); |
2376 | } |
2377 | |
2378 | // Limit search depth. |
2379 | if (Depth >= SelectionDAG::MaxRecursionDepth) |
2380 | return false; |
2381 | |
2382 | SDLoc DL(Op); |
2383 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
2384 | |
2385 | // Helper for demanding the specified elements and all the bits of both binary |
2386 | // operands. |
2387 | auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) { |
2388 | SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts, |
2389 | TLO.DAG, Depth + 1); |
2390 | SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts, |
2391 | TLO.DAG, Depth + 1); |
2392 | if (NewOp0 || NewOp1) { |
2393 | SDValue NewOp = TLO.DAG.getNode( |
2394 | Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1); |
2395 | return TLO.CombineTo(Op, NewOp); |
2396 | } |
2397 | return false; |
2398 | }; |
2399 | |
2400 | switch (Opcode) { |
2401 | case ISD::SCALAR_TO_VECTOR: { |
2402 | if (!DemandedElts[0]) { |
2403 | KnownUndef.setAllBits(); |
2404 | return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); |
2405 | } |
2406 | KnownUndef.setHighBits(NumElts - 1); |
2407 | break; |
2408 | } |
2409 | case ISD::BITCAST: { |
2410 | SDValue Src = Op.getOperand(0); |
2411 | EVT SrcVT = Src.getValueType(); |
2412 | |
2413 | // We only handle vectors here. |
2414 | // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits? |
2415 | if (!SrcVT.isVector()) |
2416 | break; |
2417 | |
2418 | // Fast handling of 'identity' bitcasts. |
2419 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
2420 | if (NumSrcElts == NumElts) |
2421 | return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, |
2422 | KnownZero, TLO, Depth + 1); |
2423 | |
2424 | APInt SrcZero, SrcUndef; |
2425 | APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts); |
2426 | |
2427 | // Bitcast from 'large element' src vector to 'small element' vector, we |
2428 | // must demand a source element if any DemandedElt maps to it. |
2429 | if ((NumElts % NumSrcElts) == 0) { |
2430 | unsigned Scale = NumElts / NumSrcElts; |
2431 | for (unsigned i = 0; i != NumElts; ++i) |
2432 | if (DemandedElts[i]) |
2433 | SrcDemandedElts.setBit(i / Scale); |
2434 | |
2435 | if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, |
2436 | TLO, Depth + 1)) |
2437 | return true; |
2438 | |
2439 | // Try calling SimplifyDemandedBits, converting demanded elts to the bits |
2440 | // of the large element. |
2441 | // TODO - bigendian once we have test coverage. |
2442 | if (TLO.DAG.getDataLayout().isLittleEndian()) { |
2443 | unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits(); |
2444 | APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits); |
2445 | for (unsigned i = 0; i != NumElts; ++i) |
2446 | if (DemandedElts[i]) { |
2447 | unsigned Ofs = (i % Scale) * EltSizeInBits; |
2448 | SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits); |
2449 | } |
2450 | |
2451 | KnownBits Known; |
2452 | if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known, |
2453 | TLO, Depth + 1)) |
2454 | return true; |
2455 | } |
2456 | |
2457 | // If the src element is zero/undef then all the output elements will be - |
2458 | // only demanded elements are guaranteed to be correct. |
2459 | for (unsigned i = 0; i != NumSrcElts; ++i) { |
2460 | if (SrcDemandedElts[i]) { |
2461 | if (SrcZero[i]) |
2462 | KnownZero.setBits(i * Scale, (i + 1) * Scale); |
2463 | if (SrcUndef[i]) |
2464 | KnownUndef.setBits(i * Scale, (i + 1) * Scale); |
2465 | } |
2466 | } |
2467 | } |
2468 | |
2469 | // Bitcast from 'small element' src vector to 'large element' vector, we |
2470 | // demand all smaller source elements covered by the larger demanded element |
2471 | // of this vector. |
2472 | if ((NumSrcElts % NumElts) == 0) { |
2473 | unsigned Scale = NumSrcElts / NumElts; |
2474 | for (unsigned i = 0; i != NumElts; ++i) |
2475 | if (DemandedElts[i]) |
2476 | SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale); |
2477 | |
2478 | if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, |
2479 | TLO, Depth + 1)) |
2480 | return true; |
2481 | |
2482 | // If all the src elements covering an output element are zero/undef, then |
2483 | // the output element will be as well, assuming it was demanded. |
2484 | for (unsigned i = 0; i != NumElts; ++i) { |
2485 | if (DemandedElts[i]) { |
2486 | if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue()) |
2487 | KnownZero.setBit(i); |
2488 | if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue()) |
2489 | KnownUndef.setBit(i); |
2490 | } |
2491 | } |
2492 | } |
2493 | break; |
2494 | } |
2495 | case ISD::BUILD_VECTOR: { |
2496 | // Check all elements and simplify any unused elements with UNDEF. |
2497 | if (!DemandedElts.isAllOnesValue()) { |
2498 | // Don't simplify BROADCASTS. |
2499 | if (llvm::any_of(Op->op_values(), |
2500 | [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) { |
2501 | SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end()); |
2502 | bool Updated = false; |
2503 | for (unsigned i = 0; i != NumElts; ++i) { |
2504 | if (!DemandedElts[i] && !Ops[i].isUndef()) { |
2505 | Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType()); |
2506 | KnownUndef.setBit(i); |
2507 | Updated = true; |
2508 | } |
2509 | } |
2510 | if (Updated) |
2511 | return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops)); |
2512 | } |
2513 | } |
2514 | for (unsigned i = 0; i != NumElts; ++i) { |
2515 | SDValue SrcOp = Op.getOperand(i); |
2516 | if (SrcOp.isUndef()) { |
2517 | KnownUndef.setBit(i); |
2518 | } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() && |
2519 | (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) { |
2520 | KnownZero.setBit(i); |
2521 | } |
2522 | } |
2523 | break; |
2524 | } |
2525 | case ISD::CONCAT_VECTORS: { |
2526 | EVT SubVT = Op.getOperand(0).getValueType(); |
2527 | unsigned NumSubVecs = Op.getNumOperands(); |
2528 | unsigned NumSubElts = SubVT.getVectorNumElements(); |
2529 | for (unsigned i = 0; i != NumSubVecs; ++i) { |
2530 | SDValue SubOp = Op.getOperand(i); |
2531 | APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts); |
2532 | APInt SubUndef, SubZero; |
2533 | if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO, |
2534 | Depth + 1)) |
2535 | return true; |
2536 | KnownUndef.insertBits(SubUndef, i * NumSubElts); |
2537 | KnownZero.insertBits(SubZero, i * NumSubElts); |
2538 | } |
2539 | break; |
2540 | } |
2541 | case ISD::INSERT_SUBVECTOR: { |
2542 | // Demand any elements from the subvector and the remainder from the src its |
2543 | // inserted into. |
2544 | SDValue Src = Op.getOperand(0); |
2545 | SDValue Sub = Op.getOperand(1); |
2546 | uint64_t Idx = Op.getConstantOperandVal(2); |
2547 | unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); |
2548 | APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); |
2549 | APInt DemandedSrcElts = DemandedElts; |
2550 | DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); |
2551 | |
2552 | APInt SubUndef, SubZero; |
2553 | if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO, |
2554 | Depth + 1)) |
2555 | return true; |
2556 | |
2557 | // If none of the src operand elements are demanded, replace it with undef. |
2558 | if (!DemandedSrcElts && !Src.isUndef()) |
2559 | return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
2560 | TLO.DAG.getUNDEF(VT), Sub, |
2561 | Op.getOperand(2))); |
2562 | |
2563 | if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero, |
2564 | TLO, Depth + 1)) |
2565 | return true; |
2566 | KnownUndef.insertBits(SubUndef, Idx); |
2567 | KnownZero.insertBits(SubZero, Idx); |
2568 | |
2569 | // Attempt to avoid multi-use ops if we don't need anything from them. |
2570 | if (!DemandedSrcElts.isAllOnesValue() || |
2571 | !DemandedSubElts.isAllOnesValue()) { |
2572 | SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( |
2573 | Src, DemandedSrcElts, TLO.DAG, Depth + 1); |
2574 | SDValue NewSub = SimplifyMultipleUseDemandedVectorElts( |
2575 | Sub, DemandedSubElts, TLO.DAG, Depth + 1); |
2576 | if (NewSrc || NewSub) { |
2577 | NewSrc = NewSrc ? NewSrc : Src; |
2578 | NewSub = NewSub ? NewSub : Sub; |
2579 | SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc, |
2580 | NewSub, Op.getOperand(2)); |
2581 | return TLO.CombineTo(Op, NewOp); |
2582 | } |
2583 | } |
2584 | break; |
2585 | } |
2586 | case ISD::EXTRACT_SUBVECTOR: { |
2587 | // Offset the demanded elts by the subvector index. |
2588 | SDValue Src = Op.getOperand(0); |
2589 | if (Src.getValueType().isScalableVector()) |
2590 | break; |
2591 | uint64_t Idx = Op.getConstantOperandVal(1); |
2592 | unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); |
2593 | APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); |
2594 | |
2595 | APInt SrcUndef, SrcZero; |
2596 | if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO, |
2597 | Depth + 1)) |
2598 | return true; |
2599 | KnownUndef = SrcUndef.extractBits(NumElts, Idx); |
2600 | KnownZero = SrcZero.extractBits(NumElts, Idx); |
2601 | |
2602 | // Attempt to avoid multi-use ops if we don't need anything from them. |
2603 | if (!DemandedElts.isAllOnesValue()) { |
2604 | SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( |
2605 | Src, DemandedSrcElts, TLO.DAG, Depth + 1); |
2606 | if (NewSrc) { |
2607 | SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc, |
2608 | Op.getOperand(1)); |
2609 | return TLO.CombineTo(Op, NewOp); |
2610 | } |
2611 | } |
2612 | break; |
2613 | } |
2614 | case ISD::INSERT_VECTOR_ELT: { |
2615 | SDValue Vec = Op.getOperand(0); |
2616 | SDValue Scl = Op.getOperand(1); |
2617 | auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); |
2618 | |
2619 | // For a legal, constant insertion index, if we don't need this insertion |
2620 | // then strip it, else remove it from the demanded elts. |
2621 | if (CIdx && CIdx->getAPIntValue().ult(NumElts)) { |
2622 | unsigned Idx = CIdx->getZExtValue(); |
2623 | if (!DemandedElts[Idx]) |
2624 | return TLO.CombineTo(Op, Vec); |
2625 | |
2626 | APInt DemandedVecElts(DemandedElts); |
2627 | DemandedVecElts.clearBit(Idx); |
2628 | if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef, |
2629 | KnownZero, TLO, Depth + 1)) |
2630 | return true; |
2631 | |
2632 | KnownUndef.setBitVal(Idx, Scl.isUndef()); |
2633 | |
2634 | KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl)); |
2635 | break; |
2636 | } |
2637 | |
2638 | APInt VecUndef, VecZero; |
2639 | if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO, |
2640 | Depth + 1)) |
2641 | return true; |
2642 | // Without knowing the insertion index we can't set KnownUndef/KnownZero. |
2643 | break; |
2644 | } |
2645 | case ISD::VSELECT: { |
2646 | // Try to transform the select condition based on the current demanded |
2647 | // elements. |
2648 | // TODO: If a condition element is undef, we can choose from one arm of the |
2649 | // select (and if one arm is undef, then we can propagate that to the |
2650 | // result). |
2651 | // TODO - add support for constant vselect masks (see IR version of this). |
2652 | APInt UnusedUndef, UnusedZero; |
2653 | if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef, |
2654 | UnusedZero, TLO, Depth + 1)) |
2655 | return true; |
2656 | |
2657 | // See if we can simplify either vselect operand. |
2658 | APInt DemandedLHS(DemandedElts); |
2659 | APInt DemandedRHS(DemandedElts); |
2660 | APInt UndefLHS, ZeroLHS; |
2661 | APInt UndefRHS, ZeroRHS; |
2662 | if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS, |
2663 | ZeroLHS, TLO, Depth + 1)) |
2664 | return true; |
2665 | if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS, |
2666 | ZeroRHS, TLO, Depth + 1)) |
2667 | return true; |
2668 | |
2669 | KnownUndef = UndefLHS & UndefRHS; |
2670 | KnownZero = ZeroLHS & ZeroRHS; |
2671 | break; |
2672 | } |
2673 | case ISD::VECTOR_SHUFFLE: { |
2674 | ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); |
2675 | |
2676 | // Collect demanded elements from shuffle operands.. |
2677 | APInt DemandedLHS(NumElts, 0); |
2678 | APInt DemandedRHS(NumElts, 0); |
2679 | for (unsigned i = 0; i != NumElts; ++i) { |
2680 | int M = ShuffleMask[i]; |
2681 | if (M < 0 || !DemandedElts[i]) |
2682 | continue; |
2683 | assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range")((0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range" ) ? static_cast<void> (0) : __assert_fail ("0 <= M && M < (int)(2 * NumElts) && \"Shuffle index out of range\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2683, __PRETTY_FUNCTION__)); |
2684 | if (M < (int)NumElts) |
2685 | DemandedLHS.setBit(M); |
2686 | else |
2687 | DemandedRHS.setBit(M - NumElts); |
2688 | } |
2689 | |
2690 | // See if we can simplify either shuffle operand. |
2691 | APInt UndefLHS, ZeroLHS; |
2692 | APInt UndefRHS, ZeroRHS; |
2693 | if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS, |
2694 | ZeroLHS, TLO, Depth + 1)) |
2695 | return true; |
2696 | if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS, |
2697 | ZeroRHS, TLO, Depth + 1)) |
2698 | return true; |
2699 | |
2700 | // Simplify mask using undef elements from LHS/RHS. |
2701 | bool Updated = false; |
2702 | bool IdentityLHS = true, IdentityRHS = true; |
2703 | SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end()); |
2704 | for (unsigned i = 0; i != NumElts; ++i) { |
2705 | int &M = NewMask[i]; |
2706 | if (M < 0) |
2707 | continue; |
2708 | if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) || |
2709 | (M >= (int)NumElts && UndefRHS[M - NumElts])) { |
2710 | Updated = true; |
2711 | M = -1; |
2712 | } |
2713 | IdentityLHS &= (M < 0) || (M == (int)i); |
2714 | IdentityRHS &= (M < 0) || ((M - NumElts) == i); |
2715 | } |
2716 | |
2717 | // Update legal shuffle masks based on demanded elements if it won't reduce |
2718 | // to Identity which can cause premature removal of the shuffle mask. |
2719 | if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) { |
2720 | SDValue LegalShuffle = |
2721 | buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1), |
2722 | NewMask, TLO.DAG); |
2723 | if (LegalShuffle) |
2724 | return TLO.CombineTo(Op, LegalShuffle); |
2725 | } |
2726 | |
2727 | // Propagate undef/zero elements from LHS/RHS. |
2728 | for (unsigned i = 0; i != NumElts; ++i) { |
2729 | int M = ShuffleMask[i]; |
2730 | if (M < 0) { |
2731 | KnownUndef.setBit(i); |
2732 | } else if (M < (int)NumElts) { |
2733 | if (UndefLHS[M]) |
2734 | KnownUndef.setBit(i); |
2735 | if (ZeroLHS[M]) |
2736 | KnownZero.setBit(i); |
2737 | } else { |
2738 | if (UndefRHS[M - NumElts]) |
2739 | KnownUndef.setBit(i); |
2740 | if (ZeroRHS[M - NumElts]) |
2741 | KnownZero.setBit(i); |
2742 | } |
2743 | } |
2744 | break; |
2745 | } |
2746 | case ISD::ANY_EXTEND_VECTOR_INREG: |
2747 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
2748 | case ISD::ZERO_EXTEND_VECTOR_INREG: { |
2749 | APInt SrcUndef, SrcZero; |
2750 | SDValue Src = Op.getOperand(0); |
2751 | unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); |
2752 | APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts); |
2753 | if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO, |
2754 | Depth + 1)) |
2755 | return true; |
2756 | KnownZero = SrcZero.zextOrTrunc(NumElts); |
2757 | KnownUndef = SrcUndef.zextOrTrunc(NumElts); |
2758 | |
2759 | if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG && |
2760 | Op.getValueSizeInBits() == Src.getValueSizeInBits() && |
2761 | DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) { |
2762 | // aext - if we just need the bottom element then we can bitcast. |
2763 | return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); |
2764 | } |
2765 | |
2766 | if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) { |
2767 | // zext(undef) upper bits are guaranteed to be zero. |
2768 | if (DemandedElts.isSubsetOf(KnownUndef)) |
2769 | return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); |
2770 | KnownUndef.clearAllBits(); |
2771 | } |
2772 | break; |
2773 | } |
2774 | |
2775 | // TODO: There are more binop opcodes that could be handled here - MIN, |
2776 | // MAX, saturated math, etc. |
2777 | case ISD::OR: |
2778 | case ISD::XOR: |
2779 | case ISD::ADD: |
2780 | case ISD::SUB: |
2781 | case ISD::FADD: |
2782 | case ISD::FSUB: |
2783 | case ISD::FMUL: |
2784 | case ISD::FDIV: |
2785 | case ISD::FREM: { |
2786 | SDValue Op0 = Op.getOperand(0); |
2787 | SDValue Op1 = Op.getOperand(1); |
2788 | |
2789 | APInt UndefRHS, ZeroRHS; |
2790 | if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO, |
2791 | Depth + 1)) |
2792 | return true; |
2793 | APInt UndefLHS, ZeroLHS; |
2794 | if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO, |
2795 | Depth + 1)) |
2796 | return true; |
2797 | |
2798 | KnownZero = ZeroLHS & ZeroRHS; |
2799 | KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS); |
2800 | |
2801 | // Attempt to avoid multi-use ops if we don't need anything from them. |
2802 | // TODO - use KnownUndef to relax the demandedelts? |
2803 | if (!DemandedElts.isAllOnesValue()) |
2804 | if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) |
2805 | return true; |
2806 | break; |
2807 | } |
2808 | case ISD::SHL: |
2809 | case ISD::SRL: |
2810 | case ISD::SRA: |
2811 | case ISD::ROTL: |
2812 | case ISD::ROTR: { |
2813 | SDValue Op0 = Op.getOperand(0); |
2814 | SDValue Op1 = Op.getOperand(1); |
2815 | |
2816 | APInt UndefRHS, ZeroRHS; |
2817 | if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO, |
2818 | Depth + 1)) |
2819 | return true; |
2820 | APInt UndefLHS, ZeroLHS; |
2821 | if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO, |
2822 | Depth + 1)) |
2823 | return true; |
2824 | |
2825 | KnownZero = ZeroLHS; |
2826 | KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop? |
2827 | |
2828 | // Attempt to avoid multi-use ops if we don't need anything from them. |
2829 | // TODO - use KnownUndef to relax the demandedelts? |
2830 | if (!DemandedElts.isAllOnesValue()) |
2831 | if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) |
2832 | return true; |
2833 | break; |
2834 | } |
2835 | case ISD::MUL: |
2836 | case ISD::AND: { |
2837 | SDValue Op0 = Op.getOperand(0); |
2838 | SDValue Op1 = Op.getOperand(1); |
2839 | |
2840 | APInt SrcUndef, SrcZero; |
2841 | if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO, |
2842 | Depth + 1)) |
2843 | return true; |
2844 | if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero, |
2845 | TLO, Depth + 1)) |
2846 | return true; |
2847 | |
2848 | // If either side has a zero element, then the result element is zero, even |
2849 | // if the other is an UNDEF. |
2850 | // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros |
2851 | // and then handle 'and' nodes with the rest of the binop opcodes. |
2852 | KnownZero |= SrcZero; |
2853 | KnownUndef &= SrcUndef; |
2854 | KnownUndef &= ~KnownZero; |
2855 | |
2856 | // Attempt to avoid multi-use ops if we don't need anything from them. |
2857 | // TODO - use KnownUndef to relax the demandedelts? |
2858 | if (!DemandedElts.isAllOnesValue()) |
2859 | if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) |
2860 | return true; |
2861 | break; |
2862 | } |
2863 | case ISD::TRUNCATE: |
2864 | case ISD::SIGN_EXTEND: |
2865 | case ISD::ZERO_EXTEND: |
2866 | if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, |
2867 | KnownZero, TLO, Depth + 1)) |
2868 | return true; |
2869 | |
2870 | if (Op.getOpcode() == ISD::ZERO_EXTEND) { |
2871 | // zext(undef) upper bits are guaranteed to be zero. |
2872 | if (DemandedElts.isSubsetOf(KnownUndef)) |
2873 | return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); |
2874 | KnownUndef.clearAllBits(); |
2875 | } |
2876 | break; |
2877 | default: { |
2878 | if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { |
2879 | if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef, |
2880 | KnownZero, TLO, Depth)) |
2881 | return true; |
2882 | } else { |
2883 | KnownBits Known; |
2884 | APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits); |
2885 | if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known, |
2886 | TLO, Depth, AssumeSingleUse)) |
2887 | return true; |
2888 | } |
2889 | break; |
2890 | } |
2891 | } |
2892 | assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero")(((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero" ) ? static_cast<void> (0) : __assert_fail ("(KnownUndef & KnownZero) == 0 && \"Elements flagged as undef AND zero\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2892, __PRETTY_FUNCTION__)); |
2893 | |
2894 | // Constant fold all undef cases. |
2895 | // TODO: Handle zero cases as well. |
2896 | if (DemandedElts.isSubsetOf(KnownUndef)) |
2897 | return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); |
2898 | |
2899 | return false; |
2900 | } |
2901 | |
2902 | /// Determine which of the bits specified in Mask are known to be either zero or |
2903 | /// one and return them in the Known. |
2904 | void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, |
2905 | KnownBits &Known, |
2906 | const APInt &DemandedElts, |
2907 | const SelectionDAG &DAG, |
2908 | unsigned Depth) const { |
2909 | assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use MaskedValueIsZero if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2914, __PRETTY_FUNCTION__)) |
2910 | Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use MaskedValueIsZero if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2914, __PRETTY_FUNCTION__)) |
2911 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use MaskedValueIsZero if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2914, __PRETTY_FUNCTION__)) |
2912 | Op.getOpcode() == ISD::INTRINSIC_VOID) &&(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use MaskedValueIsZero if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2914, __PRETTY_FUNCTION__)) |
2913 | "Should use MaskedValueIsZero if you don't know whether Op"(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use MaskedValueIsZero if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2914, __PRETTY_FUNCTION__)) |
2914 | " is a target node!")(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use MaskedValueIsZero if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2914, __PRETTY_FUNCTION__)); |
2915 | Known.resetAll(); |
2916 | } |
2917 | |
2918 | void TargetLowering::computeKnownBitsForTargetInstr( |
2919 | GISelKnownBits &Analysis, Register R, KnownBits &Known, |
2920 | const APInt &DemandedElts, const MachineRegisterInfo &MRI, |
2921 | unsigned Depth) const { |
2922 | Known.resetAll(); |
2923 | } |
2924 | |
2925 | void TargetLowering::computeKnownBitsForFrameIndex( |
2926 | const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const { |
2927 | // The low bits are known zero if the pointer is aligned. |
2928 | Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx))); |
2929 | } |
2930 | |
2931 | Align TargetLowering::computeKnownAlignForTargetInstr( |
2932 | GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, |
2933 | unsigned Depth) const { |
2934 | return Align(1); |
2935 | } |
2936 | |
2937 | /// This method can be implemented by targets that want to expose additional |
2938 | /// information about sign bits to the DAG Combiner. |
2939 | unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, |
2940 | const APInt &, |
2941 | const SelectionDAG &, |
2942 | unsigned Depth) const { |
2943 | assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use ComputeNumSignBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use ComputeNumSignBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2948, __PRETTY_FUNCTION__)) |
2944 | Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use ComputeNumSignBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use ComputeNumSignBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2948, __PRETTY_FUNCTION__)) |
2945 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use ComputeNumSignBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use ComputeNumSignBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2948, __PRETTY_FUNCTION__)) |
2946 | Op.getOpcode() == ISD::INTRINSIC_VOID) &&(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use ComputeNumSignBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use ComputeNumSignBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2948, __PRETTY_FUNCTION__)) |
2947 | "Should use ComputeNumSignBits if you don't know whether Op"(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use ComputeNumSignBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use ComputeNumSignBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2948, __PRETTY_FUNCTION__)) |
2948 | " is a target node!")(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use ComputeNumSignBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use ComputeNumSignBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2948, __PRETTY_FUNCTION__)); |
2949 | return 1; |
2950 | } |
2951 | |
2952 | unsigned TargetLowering::computeNumSignBitsForTargetInstr( |
2953 | GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, |
2954 | const MachineRegisterInfo &MRI, unsigned Depth) const { |
2955 | return 1; |
2956 | } |
2957 | |
2958 | bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode( |
2959 | SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, |
2960 | TargetLoweringOpt &TLO, unsigned Depth) const { |
2961 | assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyDemandedVectorElts if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyDemandedVectorElts if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2966, __PRETTY_FUNCTION__)) |
2962 | Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyDemandedVectorElts if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyDemandedVectorElts if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2966, __PRETTY_FUNCTION__)) |
2963 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyDemandedVectorElts if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyDemandedVectorElts if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2966, __PRETTY_FUNCTION__)) |
2964 | Op.getOpcode() == ISD::INTRINSIC_VOID) &&(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyDemandedVectorElts if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyDemandedVectorElts if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2966, __PRETTY_FUNCTION__)) |
2965 | "Should use SimplifyDemandedVectorElts if you don't know whether Op"(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyDemandedVectorElts if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyDemandedVectorElts if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2966, __PRETTY_FUNCTION__)) |
2966 | " is a target node!")(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyDemandedVectorElts if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyDemandedVectorElts if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2966, __PRETTY_FUNCTION__)); |
2967 | return false; |
2968 | } |
2969 | |
2970 | bool TargetLowering::SimplifyDemandedBitsForTargetNode( |
2971 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
2972 | KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const { |
2973 | assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyDemandedBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyDemandedBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2978, __PRETTY_FUNCTION__)) |
2974 | Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyDemandedBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyDemandedBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2978, __PRETTY_FUNCTION__)) |
2975 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyDemandedBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyDemandedBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2978, __PRETTY_FUNCTION__)) |
2976 | Op.getOpcode() == ISD::INTRINSIC_VOID) &&(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyDemandedBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyDemandedBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2978, __PRETTY_FUNCTION__)) |
2977 | "Should use SimplifyDemandedBits if you don't know whether Op"(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyDemandedBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyDemandedBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2978, __PRETTY_FUNCTION__)) |
2978 | " is a target node!")(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyDemandedBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyDemandedBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2978, __PRETTY_FUNCTION__)); |
2979 | computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth); |
2980 | return false; |
2981 | } |
2982 | |
2983 | SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( |
2984 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
2985 | SelectionDAG &DAG, unsigned Depth) const { |
2986 | assert((((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2992, __PRETTY_FUNCTION__)) |
2987 | (Op.getOpcode() >= ISD::BUILTIN_OP_END ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2992, __PRETTY_FUNCTION__)) |
2988 | Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2992, __PRETTY_FUNCTION__)) |
2989 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2992, __PRETTY_FUNCTION__)) |
2990 | Op.getOpcode() == ISD::INTRINSIC_VOID) &&(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2992, __PRETTY_FUNCTION__)) |
2991 | "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2992, __PRETTY_FUNCTION__)) |
2992 | " is a target node!")(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 2992, __PRETTY_FUNCTION__)); |
2993 | return SDValue(); |
2994 | } |
2995 | |
2996 | SDValue |
2997 | TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, |
2998 | SDValue N1, MutableArrayRef<int> Mask, |
2999 | SelectionDAG &DAG) const { |
3000 | bool LegalMask = isShuffleMaskLegal(Mask, VT); |
3001 | if (!LegalMask) { |
3002 | std::swap(N0, N1); |
3003 | ShuffleVectorSDNode::commuteMask(Mask); |
3004 | LegalMask = isShuffleMaskLegal(Mask, VT); |
3005 | } |
3006 | |
3007 | if (!LegalMask) |
3008 | return SDValue(); |
3009 | |
3010 | return DAG.getVectorShuffle(VT, DL, N0, N1, Mask); |
3011 | } |
3012 | |
3013 | const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const { |
3014 | return nullptr; |
3015 | } |
3016 | |
3017 | bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, |
3018 | const SelectionDAG &DAG, |
3019 | bool SNaN, |
3020 | unsigned Depth) const { |
3021 | assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use isKnownNeverNaN if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use isKnownNeverNaN if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3026, __PRETTY_FUNCTION__)) |
3022 | Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use isKnownNeverNaN if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use isKnownNeverNaN if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3026, __PRETTY_FUNCTION__)) |
3023 | Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use isKnownNeverNaN if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use isKnownNeverNaN if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3026, __PRETTY_FUNCTION__)) |
3024 | Op.getOpcode() == ISD::INTRINSIC_VOID) &&(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use isKnownNeverNaN if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use isKnownNeverNaN if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3026, __PRETTY_FUNCTION__)) |
3025 | "Should use isKnownNeverNaN if you don't know whether Op"(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use isKnownNeverNaN if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use isKnownNeverNaN if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3026, __PRETTY_FUNCTION__)) |
3026 | " is a target node!")(((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use isKnownNeverNaN if you don't know whether Op" " is a target node!") ? static_cast<void> (0) : __assert_fail ("(Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) && \"Should use isKnownNeverNaN if you don't know whether Op\" \" is a target node!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3026, __PRETTY_FUNCTION__)); |
3027 | return false; |
3028 | } |
3029 | |
3030 | // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must |
3031 | // work with truncating build vectors and vectors with elements of less than |
3032 | // 8 bits. |
3033 | bool TargetLowering::isConstTrueVal(const SDNode *N) const { |
3034 | if (!N) |
3035 | return false; |
3036 | |
3037 | APInt CVal; |
3038 | if (auto *CN = dyn_cast<ConstantSDNode>(N)) { |
3039 | CVal = CN->getAPIntValue(); |
3040 | } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) { |
3041 | auto *CN = BV->getConstantSplatNode(); |
3042 | if (!CN) |
3043 | return false; |
3044 | |
3045 | // If this is a truncating build vector, truncate the splat value. |
3046 | // Otherwise, we may fail to match the expected values below. |
3047 | unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits(); |
3048 | CVal = CN->getAPIntValue(); |
3049 | if (BVEltWidth < CVal.getBitWidth()) |
3050 | CVal = CVal.trunc(BVEltWidth); |
3051 | } else { |
3052 | return false; |
3053 | } |
3054 | |
3055 | switch (getBooleanContents(N->getValueType(0))) { |
3056 | case UndefinedBooleanContent: |
3057 | return CVal[0]; |
3058 | case ZeroOrOneBooleanContent: |
3059 | return CVal.isOneValue(); |
3060 | case ZeroOrNegativeOneBooleanContent: |
3061 | return CVal.isAllOnesValue(); |
3062 | } |
3063 | |
3064 | llvm_unreachable("Invalid boolean contents")::llvm::llvm_unreachable_internal("Invalid boolean contents", "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3064); |
3065 | } |
3066 | |
3067 | bool TargetLowering::isConstFalseVal(const SDNode *N) const { |
3068 | if (!N) |
3069 | return false; |
3070 | |
3071 | const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); |
3072 | if (!CN) { |
3073 | const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); |
3074 | if (!BV) |
3075 | return false; |
3076 | |
3077 | // Only interested in constant splats, we don't care about undef |
3078 | // elements in identifying boolean constants and getConstantSplatNode |
3079 | // returns NULL if all ops are undef; |
3080 | CN = BV->getConstantSplatNode(); |
3081 | if (!CN) |
3082 | return false; |
3083 | } |
3084 | |
3085 | if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent) |
3086 | return !CN->getAPIntValue()[0]; |
3087 | |
3088 | return CN->isNullValue(); |
3089 | } |
3090 | |
3091 | bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, |
3092 | bool SExt) const { |
3093 | if (VT == MVT::i1) |
3094 | return N->isOne(); |
3095 | |
3096 | TargetLowering::BooleanContent Cnt = getBooleanContents(VT); |
3097 | switch (Cnt) { |
3098 | case TargetLowering::ZeroOrOneBooleanContent: |
3099 | // An extended value of 1 is always true, unless its original type is i1, |
3100 | // in which case it will be sign extended to -1. |
3101 | return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1)); |
3102 | case TargetLowering::UndefinedBooleanContent: |
3103 | case TargetLowering::ZeroOrNegativeOneBooleanContent: |
3104 | return N->isAllOnesValue() && SExt; |
3105 | } |
3106 | llvm_unreachable("Unexpected enumeration.")::llvm::llvm_unreachable_internal("Unexpected enumeration.", "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3106); |
3107 | } |
3108 | |
3109 | /// This helper function of SimplifySetCC tries to optimize the comparison when |
3110 | /// either operand of the SetCC node is a bitwise-and instruction. |
3111 | SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, |
3112 | ISD::CondCode Cond, const SDLoc &DL, |
3113 | DAGCombinerInfo &DCI) const { |
3114 | // Match these patterns in any of their permutations: |
3115 | // (X & Y) == Y |
3116 | // (X & Y) != Y |
3117 | if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND) |
3118 | std::swap(N0, N1); |
3119 | |
3120 | EVT OpVT = N0.getValueType(); |
3121 | if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() || |
3122 | (Cond != ISD::SETEQ && Cond != ISD::SETNE)) |
3123 | return SDValue(); |
3124 | |
3125 | SDValue X, Y; |
3126 | if (N0.getOperand(0) == N1) { |
3127 | X = N0.getOperand(1); |
3128 | Y = N0.getOperand(0); |
3129 | } else if (N0.getOperand(1) == N1) { |
3130 | X = N0.getOperand(0); |
3131 | Y = N0.getOperand(1); |
3132 | } else { |
3133 | return SDValue(); |
3134 | } |
3135 | |
3136 | SelectionDAG &DAG = DCI.DAG; |
3137 | SDValue Zero = DAG.getConstant(0, DL, OpVT); |
3138 | if (DAG.isKnownToBeAPowerOfTwo(Y)) { |
3139 | // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set. |
3140 | // Note that where Y is variable and is known to have at most one bit set |
3141 | // (for example, if it is Z & 1) we cannot do this; the expressions are not |
3142 | // equivalent when Y == 0. |
3143 | assert(OpVT.isInteger())((OpVT.isInteger()) ? static_cast<void> (0) : __assert_fail ("OpVT.isInteger()", "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3143, __PRETTY_FUNCTION__)); |
3144 | Cond = ISD::getSetCCInverse(Cond, OpVT); |
3145 | if (DCI.isBeforeLegalizeOps() || |
3146 | isCondCodeLegal(Cond, N0.getSimpleValueType())) |
3147 | return DAG.getSetCC(DL, VT, N0, Zero, Cond); |
3148 | } else if (N0.hasOneUse() && hasAndNotCompare(Y)) { |
3149 | // If the target supports an 'and-not' or 'and-complement' logic operation, |
3150 | // try to use that to make a comparison operation more efficient. |
3151 | // But don't do this transform if the mask is a single bit because there are |
3152 | // more efficient ways to deal with that case (for example, 'bt' on x86 or |
3153 | // 'rlwinm' on PPC). |
3154 | |
3155 | // Bail out if the compare operand that we want to turn into a zero is |
3156 | // already a zero (otherwise, infinite loop). |
3157 | auto *YConst = dyn_cast<ConstantSDNode>(Y); |
3158 | if (YConst && YConst->isNullValue()) |
3159 | return SDValue(); |
3160 | |
3161 | // Transform this into: ~X & Y == 0. |
3162 | SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT); |
3163 | SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y); |
3164 | return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond); |
3165 | } |
3166 | |
3167 | return SDValue(); |
3168 | } |
3169 | |
3170 | /// There are multiple IR patterns that could be checking whether certain |
3171 | /// truncation of a signed number would be lossy or not. The pattern which is |
3172 | /// best at IR level, may not lower optimally. Thus, we want to unfold it. |
3173 | /// We are looking for the following pattern: (KeptBits is a constant) |
3174 | /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) |
3175 | /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false. |
3176 | /// KeptBits also can't be 1, that would have been folded to %x dstcond 0 |
3177 | /// We will unfold it into the natural trunc+sext pattern: |
3178 | /// ((%x << C) a>> C) dstcond %x |
3179 | /// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x) |
3180 | SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( |
3181 | EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI, |
3182 | const SDLoc &DL) const { |
3183 | // We must be comparing with a constant. |
3184 | ConstantSDNode *C1; |
3185 | if (!(C1 = dyn_cast<ConstantSDNode>(N1))) |
3186 | return SDValue(); |
3187 | |
3188 | // N0 should be: add %x, (1 << (KeptBits-1)) |
3189 | if (N0->getOpcode() != ISD::ADD) |
3190 | return SDValue(); |
3191 | |
3192 | // And we must be 'add'ing a constant. |
3193 | ConstantSDNode *C01; |
3194 | if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1)))) |
3195 | return SDValue(); |
3196 | |
3197 | SDValue X = N0->getOperand(0); |
3198 | EVT XVT = X.getValueType(); |
3199 | |
3200 | // Validate constants ... |
3201 | |
3202 | APInt I1 = C1->getAPIntValue(); |
3203 | |
3204 | ISD::CondCode NewCond; |
3205 | if (Cond == ISD::CondCode::SETULT) { |
3206 | NewCond = ISD::CondCode::SETEQ; |
3207 | } else if (Cond == ISD::CondCode::SETULE) { |
3208 | NewCond = ISD::CondCode::SETEQ; |
3209 | // But need to 'canonicalize' the constant. |
3210 | I1 += 1; |
3211 | } else if (Cond == ISD::CondCode::SETUGT) { |
3212 | NewCond = ISD::CondCode::SETNE; |
3213 | // But need to 'canonicalize' the constant. |
3214 | I1 += 1; |
3215 | } else if (Cond == ISD::CondCode::SETUGE) { |
3216 | NewCond = ISD::CondCode::SETNE; |
3217 | } else |
3218 | return SDValue(); |
3219 | |
3220 | APInt I01 = C01->getAPIntValue(); |
3221 | |
3222 | auto checkConstants = [&I1, &I01]() -> bool { |
3223 | // Both of them must be power-of-two, and the constant from setcc is bigger. |
3224 | return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2(); |
3225 | }; |
3226 | |
3227 | if (checkConstants()) { |
3228 | // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256 |
3229 | } else { |
3230 | // What if we invert constants? (and the target predicate) |
3231 | I1.negate(); |
3232 | I01.negate(); |
3233 | assert(XVT.isInteger())((XVT.isInteger()) ? static_cast<void> (0) : __assert_fail ("XVT.isInteger()", "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3233, __PRETTY_FUNCTION__)); |
3234 | NewCond = getSetCCInverse(NewCond, XVT); |
3235 | if (!checkConstants()) |
3236 | return SDValue(); |
3237 | // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256 |
3238 | } |
3239 | |
3240 | // They are power-of-two, so which bit is set? |
3241 | const unsigned KeptBits = I1.logBase2(); |
3242 | const unsigned KeptBitsMinusOne = I01.logBase2(); |
3243 | |
3244 | // Magic! |
3245 | if (KeptBits != (KeptBitsMinusOne + 1)) |
3246 | return SDValue(); |
3247 | assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable")((KeptBits > 0 && KeptBits < XVT.getSizeInBits( ) && "unreachable") ? static_cast<void> (0) : __assert_fail ("KeptBits > 0 && KeptBits < XVT.getSizeInBits() && \"unreachable\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3247, __PRETTY_FUNCTION__)); |
3248 | |
3249 | // We don't want to do this in every single case. |
3250 | SelectionDAG &DAG = DCI.DAG; |
3251 | if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck( |
3252 | XVT, KeptBits)) |
3253 | return SDValue(); |
3254 | |
3255 | const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits; |
3256 | assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable")((MaskedBits > 0 && MaskedBits < XVT.getSizeInBits () && "unreachable") ? static_cast<void> (0) : __assert_fail ("MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && \"unreachable\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3256, __PRETTY_FUNCTION__)); |
3257 | |
3258 | // Unfold into: ((%x << C) a>> C) cond %x |
3259 | // Where 'cond' will be either 'eq' or 'ne'. |
3260 | SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT); |
3261 | SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt); |
3262 | SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt); |
3263 | SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond); |
3264 | |
3265 | return T2; |
3266 | } |
3267 | |
3268 | // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 |
3269 | SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift( |
3270 | EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, |
3271 | DAGCombinerInfo &DCI, const SDLoc &DL) const { |
3272 | assert(isConstOrConstSplat(N1C) &&((isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C )->getAPIntValue().isNullValue() && "Should be a comparison with 0." ) ? static_cast<void> (0) : __assert_fail ("isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() && \"Should be a comparison with 0.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3274, __PRETTY_FUNCTION__)) |
3273 | isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&((isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C )->getAPIntValue().isNullValue() && "Should be a comparison with 0." ) ? static_cast<void> (0) : __assert_fail ("isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() && \"Should be a comparison with 0.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3274, __PRETTY_FUNCTION__)) |
3274 | "Should be a comparison with 0.")((isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C )->getAPIntValue().isNullValue() && "Should be a comparison with 0." ) ? static_cast<void> (0) : __assert_fail ("isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() && \"Should be a comparison with 0.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3274, __PRETTY_FUNCTION__)); |
3275 | assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&(((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Valid only for [in]equality comparisons." ) ? static_cast<void> (0) : __assert_fail ("(Cond == ISD::SETEQ || Cond == ISD::SETNE) && \"Valid only for [in]equality comparisons.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3276, __PRETTY_FUNCTION__)) |
3276 | "Valid only for [in]equality comparisons.")(((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Valid only for [in]equality comparisons." ) ? static_cast<void> (0) : __assert_fail ("(Cond == ISD::SETEQ || Cond == ISD::SETNE) && \"Valid only for [in]equality comparisons.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3276, __PRETTY_FUNCTION__)); |
3277 | |
3278 | unsigned NewShiftOpcode; |
3279 | SDValue X, C, Y; |
3280 | |
3281 | SelectionDAG &DAG = DCI.DAG; |
3282 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
3283 | |
3284 | // Look for '(C l>>/<< Y)'. |
3285 | auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) { |
3286 | // The shift should be one-use. |
3287 | if (!V.hasOneUse()) |
3288 | return false; |
3289 | unsigned OldShiftOpcode = V.getOpcode(); |
3290 | switch (OldShiftOpcode) { |
3291 | case ISD::SHL: |
3292 | NewShiftOpcode = ISD::SRL; |
3293 | break; |
3294 | case ISD::SRL: |
3295 | NewShiftOpcode = ISD::SHL; |
3296 | break; |
3297 | default: |
3298 | return false; // must be a logical shift. |
3299 | } |
3300 | // We should be shifting a constant. |
3301 | // FIXME: best to use isConstantOrConstantVector(). |
3302 | C = V.getOperand(0); |
3303 | ConstantSDNode *CC = |
3304 | isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true); |
3305 | if (!CC) |
3306 | return false; |
3307 | Y = V.getOperand(1); |
3308 | |
3309 | ConstantSDNode *XC = |
3310 | isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true); |
3311 | return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
3312 | X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG); |
3313 | }; |
3314 | |
3315 | // LHS of comparison should be an one-use 'and'. |
3316 | if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) |
3317 | return SDValue(); |
3318 | |
3319 | X = N0.getOperand(0); |
3320 | SDValue Mask = N0.getOperand(1); |
3321 | |
3322 | // 'and' is commutative! |
3323 | if (!Match(Mask)) { |
3324 | std::swap(X, Mask); |
3325 | if (!Match(Mask)) |
3326 | return SDValue(); |
3327 | } |
3328 | |
3329 | EVT VT = X.getValueType(); |
3330 | |
3331 | // Produce: |
3332 | // ((X 'OppositeShiftOpcode' Y) & C) Cond 0 |
3333 | SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y); |
3334 | SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C); |
3335 | SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond); |
3336 | return T2; |
3337 | } |
3338 | |
3339 | /// Try to fold an equality comparison with a {add/sub/xor} binary operation as |
3340 | /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to |
3341 | /// handle the commuted versions of these patterns. |
3342 | SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, |
3343 | ISD::CondCode Cond, const SDLoc &DL, |
3344 | DAGCombinerInfo &DCI) const { |
3345 | unsigned BOpcode = N0.getOpcode(); |
3346 | assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&(((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD ::XOR) && "Unexpected binop") ? static_cast<void> (0) : __assert_fail ("(BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) && \"Unexpected binop\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3347, __PRETTY_FUNCTION__)) |
3347 | "Unexpected binop")(((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD ::XOR) && "Unexpected binop") ? static_cast<void> (0) : __assert_fail ("(BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) && \"Unexpected binop\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3347, __PRETTY_FUNCTION__)); |
3348 | assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode")(((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode" ) ? static_cast<void> (0) : __assert_fail ("(Cond == ISD::SETEQ || Cond == ISD::SETNE) && \"Unexpected condcode\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3348, __PRETTY_FUNCTION__)); |
3349 | |
3350 | // (X + Y) == X --> Y == 0 |
3351 | // (X - Y) == X --> Y == 0 |
3352 | // (X ^ Y) == X --> Y == 0 |
3353 | SelectionDAG &DAG = DCI.DAG; |
3354 | EVT OpVT = N0.getValueType(); |
3355 | SDValue X = N0.getOperand(0); |
3356 | SDValue Y = N0.getOperand(1); |
3357 | if (X == N1) |
3358 | return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond); |
3359 | |
3360 | if (Y != N1) |
3361 | return SDValue(); |
3362 | |
3363 | // (X + Y) == Y --> X == 0 |
3364 | // (X ^ Y) == Y --> X == 0 |
3365 | if (BOpcode == ISD::ADD || BOpcode == ISD::XOR) |
3366 | return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond); |
3367 | |
3368 | // The shift would not be valid if the operands are boolean (i1). |
3369 | if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1) |
3370 | return SDValue(); |
3371 | |
3372 | // (X - Y) == Y --> X == Y << 1 |
3373 | EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(), |
3374 | !DCI.isBeforeLegalize()); |
3375 | SDValue One = DAG.getConstant(1, DL, ShiftVT); |
3376 | SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One); |
3377 | if (!DCI.isCalledByLegalizer()) |
3378 | DCI.AddToWorklist(YShl1.getNode()); |
3379 | return DAG.getSetCC(DL, VT, X, YShl1, Cond); |
3380 | } |
3381 | |
3382 | static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, |
3383 | SDValue N0, const APInt &C1, |
3384 | ISD::CondCode Cond, const SDLoc &dl, |
3385 | SelectionDAG &DAG) { |
3386 | // Look through truncs that don't change the value of a ctpop. |
3387 | // FIXME: Add vector support? Need to be careful with setcc result type below. |
3388 | SDValue CTPOP = N0; |
3389 | if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() && |
3390 | N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits())) |
3391 | CTPOP = N0.getOperand(0); |
3392 | |
3393 | if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse()) |
3394 | return SDValue(); |
3395 | |
3396 | EVT CTVT = CTPOP.getValueType(); |
3397 | SDValue CTOp = CTPOP.getOperand(0); |
3398 | |
3399 | // If this is a vector CTPOP, keep the CTPOP if it is legal. |
3400 | // TODO: Should we check if CTPOP is legal(or custom) for scalars? |
3401 | if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT)) |
3402 | return SDValue(); |
3403 | |
3404 | // (ctpop x) u< 2 -> (x & x-1) == 0 |
3405 | // (ctpop x) u> 1 -> (x & x-1) != 0 |
3406 | if (Cond == ISD::SETULT || Cond == ISD::SETUGT) { |
3407 | unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond); |
3408 | if (C1.ugt(CostLimit + (Cond == ISD::SETULT))) |
3409 | return SDValue(); |
3410 | if (C1 == 0 && (Cond == ISD::SETULT)) |
3411 | return SDValue(); // This is handled elsewhere. |
3412 | |
3413 | unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT); |
3414 | |
3415 | SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); |
3416 | SDValue Result = CTOp; |
3417 | for (unsigned i = 0; i < Passes; i++) { |
3418 | SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne); |
3419 | Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add); |
3420 | } |
3421 | ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; |
3422 | return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC); |
3423 | } |
3424 | |
3425 | // If ctpop is not supported, expand a power-of-2 comparison based on it. |
3426 | if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) { |
3427 | // For scalars, keep CTPOP if it is legal or custom. |
3428 | if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT)) |
3429 | return SDValue(); |
3430 | // This is based on X86's custom lowering for CTPOP which produces more |
3431 | // instructions than the expansion here. |
3432 | |
3433 | // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0) |
3434 | // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) |
3435 | SDValue Zero = DAG.getConstant(0, dl, CTVT); |
3436 | SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); |
3437 | assert(CTVT.isInteger())((CTVT.isInteger()) ? static_cast<void> (0) : __assert_fail ("CTVT.isInteger()", "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3437, __PRETTY_FUNCTION__)); |
3438 | ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT); |
3439 | SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); |
3440 | SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); |
3441 | SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond); |
3442 | SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond); |
3443 | unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR; |
3444 | return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS); |
3445 | } |
3446 | |
3447 | return SDValue(); |
3448 | } |
3449 | |
3450 | /// Try to simplify a setcc built with the specified operands and cc. If it is |
3451 | /// unable to simplify it, return a null SDValue. |
3452 | SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, |
3453 | ISD::CondCode Cond, bool foldBooleans, |
3454 | DAGCombinerInfo &DCI, |
3455 | const SDLoc &dl) const { |
3456 | SelectionDAG &DAG = DCI.DAG; |
3457 | const DataLayout &Layout = DAG.getDataLayout(); |
3458 | EVT OpVT = N0.getValueType(); |
3459 | |
3460 | // Constant fold or commute setcc. |
3461 | if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl)) |
3462 | return Fold; |
3463 | |
3464 | // Ensure that the constant occurs on the RHS and fold constant comparisons. |
3465 | // TODO: Handle non-splat vector constants. All undef causes trouble. |
3466 | // FIXME: We can't yet fold constant scalable vector splats, so avoid an |
3467 | // infinite loop here when we encounter one. |
3468 | ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); |
3469 | if (isConstOrConstSplat(N0) && |
3470 | (!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) && |
3471 | (DCI.isBeforeLegalizeOps() || |
3472 | isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) |
3473 | return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); |
3474 | |
3475 | // If we have a subtract with the same 2 non-constant operands as this setcc |
3476 | // -- but in reverse order -- then try to commute the operands of this setcc |
3477 | // to match. A matching pair of setcc (cmp) and sub may be combined into 1 |
3478 | // instruction on some targets. |
3479 | if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) && |
3480 | (DCI.isBeforeLegalizeOps() || |
3481 | isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) && |
3482 | DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) && |
3483 | !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1})) |
3484 | return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); |
3485 | |
3486 | if (auto *N1C = isConstOrConstSplat(N1)) { |
3487 | const APInt &C1 = N1C->getAPIntValue(); |
3488 | |
3489 | // Optimize some CTPOP cases. |
3490 | if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG)) |
3491 | return V; |
3492 | |
3493 | // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an |
3494 | // equality comparison, then we're just comparing whether X itself is |
3495 | // zero. |
3496 | if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) && |
3497 | N0.getOperand(0).getOpcode() == ISD::CTLZ && |
3498 | isPowerOf2_32(N0.getScalarValueSizeInBits())) { |
3499 | if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) { |
3500 | if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && |
3501 | ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) { |
3502 | if ((C1 == 0) == (Cond == ISD::SETEQ)) { |
3503 | // (srl (ctlz x), 5) == 0 -> X != 0 |
3504 | // (srl (ctlz x), 5) != 1 -> X != 0 |
3505 | Cond = ISD::SETNE; |
3506 | } else { |
3507 | // (srl (ctlz x), 5) != 0 -> X == 0 |
3508 | // (srl (ctlz x), 5) == 1 -> X == 0 |
3509 | Cond = ISD::SETEQ; |
3510 | } |
3511 | SDValue Zero = DAG.getConstant(0, dl, N0.getValueType()); |
3512 | return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero, |
3513 | Cond); |
3514 | } |
3515 | } |
3516 | } |
3517 | } |
3518 | |
3519 | // FIXME: Support vectors. |
3520 | if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { |
3521 | const APInt &C1 = N1C->getAPIntValue(); |
3522 | |
3523 | // (zext x) == C --> x == (trunc C) |
3524 | // (sext x) == C --> x == (trunc C) |
3525 | if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && |
3526 | DCI.isBeforeLegalize() && N0->hasOneUse()) { |
3527 | unsigned MinBits = N0.getValueSizeInBits(); |
3528 | SDValue PreExt; |
3529 | bool Signed = false; |
3530 | if (N0->getOpcode() == ISD::ZERO_EXTEND) { |
3531 | // ZExt |
3532 | MinBits = N0->getOperand(0).getValueSizeInBits(); |
3533 | PreExt = N0->getOperand(0); |
3534 | } else if (N0->getOpcode() == ISD::AND) { |
3535 | // DAGCombine turns costly ZExts into ANDs |
3536 | if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) |
3537 | if ((C->getAPIntValue()+1).isPowerOf2()) { |
3538 | MinBits = C->getAPIntValue().countTrailingOnes(); |
3539 | PreExt = N0->getOperand(0); |
3540 | } |
3541 | } else if (N0->getOpcode() == ISD::SIGN_EXTEND) { |
3542 | // SExt |
3543 | MinBits = N0->getOperand(0).getValueSizeInBits(); |
3544 | PreExt = N0->getOperand(0); |
3545 | Signed = true; |
3546 | } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) { |
3547 | // ZEXTLOAD / SEXTLOAD |
3548 | if (LN0->getExtensionType() == ISD::ZEXTLOAD) { |
3549 | MinBits = LN0->getMemoryVT().getSizeInBits(); |
3550 | PreExt = N0; |
3551 | } else if (LN0->getExtensionType() == ISD::SEXTLOAD) { |
3552 | Signed = true; |
3553 | MinBits = LN0->getMemoryVT().getSizeInBits(); |
3554 | PreExt = N0; |
3555 | } |
3556 | } |
3557 | |
3558 | // Figure out how many bits we need to preserve this constant. |
3559 | unsigned ReqdBits = Signed ? |
3560 | C1.getBitWidth() - C1.getNumSignBits() + 1 : |
3561 | C1.getActiveBits(); |
3562 | |
3563 | // Make sure we're not losing bits from the constant. |
3564 | if (MinBits > 0 && |
3565 | MinBits < C1.getBitWidth() && |
3566 | MinBits >= ReqdBits) { |
3567 | EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); |
3568 | if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { |
3569 | // Will get folded away. |
3570 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt); |
3571 | if (MinBits == 1 && C1 == 1) |
3572 | // Invert the condition. |
3573 | return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1), |
3574 | Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); |
3575 | SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT); |
3576 | return DAG.getSetCC(dl, VT, Trunc, C, Cond); |
3577 | } |
3578 | |
3579 | // If truncating the setcc operands is not desirable, we can still |
3580 | // simplify the expression in some cases: |
3581 | // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc) |
3582 | // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc)) |
3583 | // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc)) |
3584 | // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc) |
3585 | // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc)) |
3586 | // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc) |
3587 | SDValue TopSetCC = N0->getOperand(0); |
3588 | unsigned N0Opc = N0->getOpcode(); |
3589 | bool SExt = (N0Opc == ISD::SIGN_EXTEND); |
3590 | if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 && |
3591 | TopSetCC.getOpcode() == ISD::SETCC && |
3592 | (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) && |
3593 | (isConstFalseVal(N1C) || |
3594 | isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) { |
3595 | |
3596 | bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) || |
3597 | (!N1C->isNullValue() && Cond == ISD::SETNE); |
3598 | |
3599 | if (!Inverse) |
3600 | return TopSetCC; |
3601 | |
3602 | ISD::CondCode InvCond = ISD::getSetCCInverse( |
3603 | cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(), |
3604 | TopSetCC.getOperand(0).getValueType()); |
3605 | return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0), |
3606 | TopSetCC.getOperand(1), |
3607 | InvCond); |
3608 | } |
3609 | } |
3610 | } |
3611 | |
3612 | // If the LHS is '(and load, const)', the RHS is 0, the test is for |
3613 | // equality or unsigned, and all 1 bits of the const are in the same |
3614 | // partial word, see if we can shorten the load. |
3615 | if (DCI.isBeforeLegalize() && |
3616 | !ISD::isSignedIntSetCC(Cond) && |
3617 | N0.getOpcode() == ISD::AND && C1 == 0 && |
3618 | N0.getNode()->hasOneUse() && |
3619 | isa<LoadSDNode>(N0.getOperand(0)) && |
3620 | N0.getOperand(0).getNode()->hasOneUse() && |
3621 | isa<ConstantSDNode>(N0.getOperand(1))) { |
3622 | LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0)); |
3623 | APInt bestMask; |
3624 | unsigned bestWidth = 0, bestOffset = 0; |
3625 | if (Lod->isSimple() && Lod->isUnindexed()) { |
3626 | unsigned origWidth = N0.getValueSizeInBits(); |
3627 | unsigned maskWidth = origWidth; |
3628 | // We can narrow (e.g.) 16-bit extending loads on 32-bit target to |
3629 | // 8 bits, but have to be careful... |
3630 | if (Lod->getExtensionType() != ISD::NON_EXTLOAD) |
3631 | origWidth = Lod->getMemoryVT().getSizeInBits(); |
3632 | const APInt &Mask = N0.getConstantOperandAPInt(1); |
3633 | for (unsigned width = origWidth / 2; width>=8; width /= 2) { |
3634 | APInt newMask = APInt::getLowBitsSet(maskWidth, width); |
3635 | for (unsigned offset=0; offset<origWidth/width; offset++) { |
3636 | if (Mask.isSubsetOf(newMask)) { |
3637 | if (Layout.isLittleEndian()) |
3638 | bestOffset = (uint64_t)offset * (width/8); |
3639 | else |
3640 | bestOffset = (origWidth/width - offset - 1) * (width/8); |
3641 | bestMask = Mask.lshr(offset * (width/8) * 8); |
3642 | bestWidth = width; |
3643 | break; |
3644 | } |
3645 | newMask <<= width; |
3646 | } |
3647 | } |
3648 | } |
3649 | if (bestWidth) { |
3650 | EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth); |
3651 | if (newVT.isRound() && |
3652 | shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) { |
3653 | SDValue Ptr = Lod->getBasePtr(); |
3654 | if (bestOffset != 0) |
3655 | Ptr = |
3656 | DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl); |
3657 | SDValue NewLoad = |
3658 | DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, |
3659 | Lod->getPointerInfo().getWithOffset(bestOffset), |
3660 | Lod->getOriginalAlign()); |
3661 | return DAG.getSetCC(dl, VT, |
3662 | DAG.getNode(ISD::AND, dl, newVT, NewLoad, |
3663 | DAG.getConstant(bestMask.trunc(bestWidth), |
3664 | dl, newVT)), |
3665 | DAG.getConstant(0LL, dl, newVT), Cond); |
3666 | } |
3667 | } |
3668 | } |
3669 | |
3670 | // If the LHS is a ZERO_EXTEND, perform the comparison on the input. |
3671 | if (N0.getOpcode() == ISD::ZERO_EXTEND) { |
3672 | unsigned InSize = N0.getOperand(0).getValueSizeInBits(); |
3673 | |
3674 | // If the comparison constant has bits in the upper part, the |
3675 | // zero-extended value could never match. |
3676 | if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(), |
3677 | C1.getBitWidth() - InSize))) { |
3678 | switch (Cond) { |
3679 | case ISD::SETUGT: |
3680 | case ISD::SETUGE: |
3681 | case ISD::SETEQ: |
3682 | return DAG.getConstant(0, dl, VT); |
3683 | case ISD::SETULT: |
3684 | case ISD::SETULE: |
3685 | case ISD::SETNE: |
3686 | return DAG.getConstant(1, dl, VT); |
3687 | case ISD::SETGT: |
3688 | case ISD::SETGE: |
3689 | // True if the sign bit of C1 is set. |
3690 | return DAG.getConstant(C1.isNegative(), dl, VT); |
3691 | case ISD::SETLT: |
3692 | case ISD::SETLE: |
3693 | // True if the sign bit of C1 isn't set. |
3694 | return DAG.getConstant(C1.isNonNegative(), dl, VT); |
3695 | default: |
3696 | break; |
3697 | } |
3698 | } |
3699 | |
3700 | // Otherwise, we can perform the comparison with the low bits. |
3701 | switch (Cond) { |
3702 | case ISD::SETEQ: |
3703 | case ISD::SETNE: |
3704 | case ISD::SETUGT: |
3705 | case ISD::SETUGE: |
3706 | case ISD::SETULT: |
3707 | case ISD::SETULE: { |
3708 | EVT newVT = N0.getOperand(0).getValueType(); |
3709 | if (DCI.isBeforeLegalizeOps() || |
3710 | (isOperationLegal(ISD::SETCC, newVT) && |
3711 | isCondCodeLegal(Cond, newVT.getSimpleVT()))) { |
3712 | EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT); |
3713 | SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT); |
3714 | |
3715 | SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0), |
3716 | NewConst, Cond); |
3717 | return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType()); |
3718 | } |
3719 | break; |
3720 | } |
3721 | default: |
3722 | break; // todo, be more careful with signed comparisons |
3723 | } |
3724 | } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && |
3725 | (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { |
3726 | EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT(); |
3727 | unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); |
3728 | EVT ExtDstTy = N0.getValueType(); |
3729 | unsigned ExtDstTyBits = ExtDstTy.getSizeInBits(); |
3730 | |
3731 | // If the constant doesn't fit into the number of bits for the source of |
3732 | // the sign extension, it is impossible for both sides to be equal. |
3733 | if (C1.getMinSignedBits() > ExtSrcTyBits) |
3734 | return DAG.getConstant(Cond == ISD::SETNE, dl, VT); |
3735 | |
3736 | SDValue ZextOp; |
3737 | EVT Op0Ty = N0.getOperand(0).getValueType(); |
3738 | if (Op0Ty == ExtSrcTy) { |
3739 | ZextOp = N0.getOperand(0); |
3740 | } else { |
3741 | APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); |
3742 | ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), |
3743 | DAG.getConstant(Imm, dl, Op0Ty)); |
3744 | } |
3745 | if (!DCI.isCalledByLegalizer()) |
3746 | DCI.AddToWorklist(ZextOp.getNode()); |
3747 | // Otherwise, make this a use of a zext. |
3748 | return DAG.getSetCC(dl, VT, ZextOp, |
3749 | DAG.getConstant(C1 & APInt::getLowBitsSet( |
3750 | ExtDstTyBits, |
3751 | ExtSrcTyBits), |
3752 | dl, ExtDstTy), |
3753 | Cond); |
3754 | } else if ((N1C->isNullValue() || N1C->isOne()) && |
3755 | (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { |
3756 | // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC |
3757 | if (N0.getOpcode() == ISD::SETCC && |
3758 | isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) && |
3759 | (N0.getValueType() == MVT::i1 || |
3760 | getBooleanContents(N0.getOperand(0).getValueType()) == |
3761 | ZeroOrOneBooleanContent)) { |
3762 | bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne()); |
3763 | if (TrueWhenTrue) |
3764 | return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); |
3765 | // Invert the condition. |
3766 | ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); |
3767 | CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType()); |
3768 | if (DCI.isBeforeLegalizeOps() || |
3769 | isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType())) |
3770 | return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); |
3771 | } |
3772 | |
3773 | if ((N0.getOpcode() == ISD::XOR || |
3774 | (N0.getOpcode() == ISD::AND && |
3775 | N0.getOperand(0).getOpcode() == ISD::XOR && |
3776 | N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && |
3777 | isa<ConstantSDNode>(N0.getOperand(1)) && |
3778 | cast<ConstantSDNode>(N0.getOperand(1))->isOne()) { |
3779 | // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We |
3780 | // can only do this if the top bits are known zero. |
3781 | unsigned BitWidth = N0.getValueSizeInBits(); |
3782 | if (DAG.MaskedValueIsZero(N0, |
3783 | APInt::getHighBitsSet(BitWidth, |
3784 | BitWidth-1))) { |
3785 | // Okay, get the un-inverted input value. |
3786 | SDValue Val; |
3787 | if (N0.getOpcode() == ISD::XOR) { |
3788 | Val = N0.getOperand(0); |
3789 | } else { |
3790 | assert(N0.getOpcode() == ISD::AND &&((N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode () == ISD::XOR) ? static_cast<void> (0) : __assert_fail ("N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::XOR" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3791, __PRETTY_FUNCTION__)) |
3791 | N0.getOperand(0).getOpcode() == ISD::XOR)((N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode () == ISD::XOR) ? static_cast<void> (0) : __assert_fail ("N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::XOR" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 3791, __PRETTY_FUNCTION__)); |
3792 | // ((X^1)&1)^1 -> X & 1 |
3793 | Val = DAG.getNode(ISD::AND, dl, N0.getValueType(), |
3794 | N0.getOperand(0).getOperand(0), |
3795 | N0.getOperand(1)); |
3796 | } |
3797 | |
3798 | return DAG.getSetCC(dl, VT, Val, N1, |
3799 | Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); |
3800 | } |
3801 | } else if (N1C->isOne()) { |
3802 | SDValue Op0 = N0; |
3803 | if (Op0.getOpcode() == ISD::TRUNCATE) |
3804 | Op0 = Op0.getOperand(0); |
3805 | |
3806 | if ((Op0.getOpcode() == ISD::XOR) && |
3807 | Op0.getOperand(0).getOpcode() == ISD::SETCC && |
3808 | Op0.getOperand(1).getOpcode() == ISD::SETCC) { |
3809 | SDValue XorLHS = Op0.getOperand(0); |
3810 | SDValue XorRHS = Op0.getOperand(1); |
3811 | // Ensure that the input setccs return an i1 type or 0/1 value. |
3812 | if (Op0.getValueType() == MVT::i1 || |
3813 | (getBooleanContents(XorLHS.getOperand(0).getValueType()) == |
3814 | ZeroOrOneBooleanContent && |
3815 | getBooleanContents(XorRHS.getOperand(0).getValueType()) == |
3816 | ZeroOrOneBooleanContent)) { |
3817 | // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc) |
3818 | Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ; |
3819 | return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond); |
3820 | } |
3821 | } |
3822 | if (Op0.getOpcode() == ISD::AND && |
3823 | isa<ConstantSDNode>(Op0.getOperand(1)) && |
3824 | cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) { |
3825 | // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. |
3826 | if (Op0.getValueType().bitsGT(VT)) |
3827 | Op0 = DAG.getNode(ISD::AND, dl, VT, |
3828 | DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)), |
3829 | DAG.getConstant(1, dl, VT)); |
3830 | else if (Op0.getValueType().bitsLT(VT)) |
3831 | Op0 = DAG.getNode(ISD::AND, dl, VT, |
3832 | DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)), |
3833 | DAG.getConstant(1, dl, VT)); |
3834 | |
3835 | return DAG.getSetCC(dl, VT, Op0, |
3836 | DAG.getConstant(0, dl, Op0.getValueType()), |
3837 | Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); |
3838 | } |
3839 | if (Op0.getOpcode() == ISD::AssertZext && |
3840 | cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1) |
3841 | return DAG.getSetCC(dl, VT, Op0, |
3842 | DAG.getConstant(0, dl, Op0.getValueType()), |
3843 | Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); |
3844 | } |
3845 | } |
3846 | |
3847 | // Given: |
3848 | // icmp eq/ne (urem %x, %y), 0 |
3849 | // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem': |
3850 | // icmp eq/ne %x, 0 |
3851 | if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() && |
3852 | (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { |
3853 | KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0)); |
3854 | KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1)); |
3855 | if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2) |
3856 | return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond); |
3857 | } |
3858 | |
3859 | if (SDValue V = |
3860 | optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl)) |
3861 | return V; |
3862 | } |
3863 | |
3864 | // These simplifications apply to splat vectors as well. |
3865 | // TODO: Handle more splat vector cases. |
3866 | if (auto *N1C = isConstOrConstSplat(N1)) { |
3867 | const APInt &C1 = N1C->getAPIntValue(); |
3868 | |
3869 | APInt MinVal, MaxVal; |
3870 | unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits(); |
3871 | if (ISD::isSignedIntSetCC(Cond)) { |
3872 | MinVal = APInt::getSignedMinValue(OperandBitSize); |
3873 | MaxVal = APInt::getSignedMaxValue(OperandBitSize); |
3874 | } else { |
3875 | MinVal = APInt::getMinValue(OperandBitSize); |
3876 | MaxVal = APInt::getMaxValue(OperandBitSize); |
3877 | } |
3878 | |
3879 | // Canonicalize GE/LE comparisons to use GT/LT comparisons. |
3880 | if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { |
3881 | // X >= MIN --> true |
3882 | if (C1 == MinVal) |
3883 | return DAG.getBoolConstant(true, dl, VT, OpVT); |
3884 | |
3885 | if (!VT.isVector()) { // TODO: Support this for vectors. |
3886 | // X >= C0 --> X > (C0 - 1) |
3887 | APInt C = C1 - 1; |
3888 | ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; |
3889 | if ((DCI.isBeforeLegalizeOps() || |
3890 | isCondCodeLegal(NewCC, VT.getSimpleVT())) && |
3891 | (!N1C->isOpaque() || (C.getBitWidth() <= 64 && |
3892 | isLegalICmpImmediate(C.getSExtValue())))) { |
3893 | return DAG.getSetCC(dl, VT, N0, |
3894 | DAG.getConstant(C, dl, N1.getValueType()), |
3895 | NewCC); |
3896 | } |
3897 | } |
3898 | } |
3899 | |
3900 | if (Cond == ISD::SETLE || Cond == ISD::SETULE) { |
3901 | // X <= MAX --> true |
3902 | if (C1 == MaxVal) |
3903 | return DAG.getBoolConstant(true, dl, VT, OpVT); |
3904 | |
3905 | // X <= C0 --> X < (C0 + 1) |
3906 | if (!VT.isVector()) { // TODO: Support this for vectors. |
3907 | APInt C = C1 + 1; |
3908 | ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; |
3909 | if ((DCI.isBeforeLegalizeOps() || |
3910 | isCondCodeLegal(NewCC, VT.getSimpleVT())) && |
3911 | (!N1C->isOpaque() || (C.getBitWidth() <= 64 && |
3912 | isLegalICmpImmediate(C.getSExtValue())))) { |
3913 | return DAG.getSetCC(dl, VT, N0, |
3914 | DAG.getConstant(C, dl, N1.getValueType()), |
3915 | NewCC); |
3916 | } |
3917 | } |
3918 | } |
3919 | |
3920 | if (Cond == ISD::SETLT || Cond == ISD::SETULT) { |
3921 | if (C1 == MinVal) |
3922 | return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false |
3923 | |
3924 | // TODO: Support this for vectors after legalize ops. |
3925 | if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { |
3926 | // Canonicalize setlt X, Max --> setne X, Max |
3927 | if (C1 == MaxVal) |
3928 | return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); |
3929 | |
3930 | // If we have setult X, 1, turn it into seteq X, 0 |
3931 | if (C1 == MinVal+1) |
3932 | return DAG.getSetCC(dl, VT, N0, |
3933 | DAG.getConstant(MinVal, dl, N0.getValueType()), |
3934 | ISD::SETEQ); |
3935 | } |
3936 | } |
3937 | |
3938 | if (Cond == ISD::SETGT || Cond == ISD::SETUGT) { |
3939 | if (C1 == MaxVal) |
3940 | return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false |
3941 | |
3942 | // TODO: Support this for vectors after legalize ops. |
3943 | if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { |
3944 | // Canonicalize setgt X, Min --> setne X, Min |
3945 | if (C1 == MinVal) |
3946 | return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); |
3947 | |
3948 | // If we have setugt X, Max-1, turn it into seteq X, Max |
3949 | if (C1 == MaxVal-1) |
3950 | return DAG.getSetCC(dl, VT, N0, |
3951 | DAG.getConstant(MaxVal, dl, N0.getValueType()), |
3952 | ISD::SETEQ); |
3953 | } |
3954 | } |
3955 | |
3956 | if (Cond == ISD::SETEQ || Cond == ISD::SETNE) { |
3957 | // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 |
3958 | if (C1.isNullValue()) |
3959 | if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift( |
3960 | VT, N0, N1, Cond, DCI, dl)) |
3961 | return CC; |
3962 | |
3963 | // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y). |
3964 | // For example, when high 32-bits of i64 X are known clear: |
3965 | // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0 |
3966 | // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1 |
3967 | bool CmpZero = N1C->getAPIntValue().isNullValue(); |
3968 | bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue(); |
3969 | if ((CmpZero || CmpNegOne) && N0.hasOneUse()) { |
3970 | // Match or(lo,shl(hi,bw/2)) pattern. |
3971 | auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) { |
3972 | unsigned EltBits = V.getScalarValueSizeInBits(); |
3973 | if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0) |
3974 | return false; |
3975 | SDValue LHS = V.getOperand(0); |
3976 | SDValue RHS = V.getOperand(1); |
3977 | APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2); |
3978 | // Unshifted element must have zero upperbits. |
3979 | if (RHS.getOpcode() == ISD::SHL && |
3980 | isa<ConstantSDNode>(RHS.getOperand(1)) && |
3981 | RHS.getConstantOperandAPInt(1) == (EltBits / 2) && |
3982 | DAG.MaskedValueIsZero(LHS, HiBits)) { |
3983 | Lo = LHS; |
3984 | Hi = RHS.getOperand(0); |
3985 | return true; |
3986 | } |
3987 | if (LHS.getOpcode() == ISD::SHL && |
3988 | isa<ConstantSDNode>(LHS.getOperand(1)) && |
3989 | LHS.getConstantOperandAPInt(1) == (EltBits / 2) && |
3990 | DAG.MaskedValueIsZero(RHS, HiBits)) { |
3991 | Lo = RHS; |
3992 | Hi = LHS.getOperand(0); |
3993 | return true; |
3994 | } |
3995 | return false; |
3996 | }; |
3997 | |
3998 | auto MergeConcat = [&](SDValue Lo, SDValue Hi) { |
3999 | unsigned EltBits = N0.getScalarValueSizeInBits(); |
4000 | unsigned HalfBits = EltBits / 2; |
4001 | APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits); |
4002 | SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT); |
4003 | SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits); |
4004 | SDValue NewN0 = |
4005 | DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask); |
4006 | SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits; |
4007 | return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond); |
4008 | }; |
4009 | |
4010 | SDValue Lo, Hi; |
4011 | if (IsConcat(N0, Lo, Hi)) |
4012 | return MergeConcat(Lo, Hi); |
4013 | |
4014 | if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) { |
4015 | SDValue Lo0, Lo1, Hi0, Hi1; |
4016 | if (IsConcat(N0.getOperand(0), Lo0, Hi0) && |
4017 | IsConcat(N0.getOperand(1), Lo1, Hi1)) { |
4018 | return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1), |
4019 | DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1)); |
4020 | } |
4021 | } |
4022 | } |
4023 | } |
4024 | |
4025 | // If we have "setcc X, C0", check to see if we can shrink the immediate |
4026 | // by changing cc. |
4027 | // TODO: Support this for vectors after legalize ops. |
4028 | if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { |
4029 | // SETUGT X, SINTMAX -> SETLT X, 0 |
4030 | // SETUGE X, SINTMIN -> SETLT X, 0 |
4031 | if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) || |
4032 | (Cond == ISD::SETUGE && C1.isMinSignedValue())) |
4033 | return DAG.getSetCC(dl, VT, N0, |
4034 | DAG.getConstant(0, dl, N1.getValueType()), |
4035 | ISD::SETLT); |
4036 | |
4037 | // SETULT X, SINTMIN -> SETGT X, -1 |
4038 | // SETULE X, SINTMAX -> SETGT X, -1 |
4039 | if ((Cond == ISD::SETULT && C1.isMinSignedValue()) || |
4040 | (Cond == ISD::SETULE && C1.isMaxSignedValue())) |
4041 | return DAG.getSetCC(dl, VT, N0, |
4042 | DAG.getAllOnesConstant(dl, N1.getValueType()), |
4043 | ISD::SETGT); |
4044 | } |
4045 | } |
4046 | |
4047 | // Back to non-vector simplifications. |
4048 | // TODO: Can we do these for vector splats? |
4049 | if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { |
4050 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4051 | const APInt &C1 = N1C->getAPIntValue(); |
4052 | EVT ShValTy = N0.getValueType(); |
4053 | |
4054 | // Fold bit comparisons when we can. This will result in an |
4055 | // incorrect value when boolean false is negative one, unless |
4056 | // the bitsize is 1 in which case the false value is the same |
4057 | // in practice regardless of the representation. |
4058 | if ((VT.getSizeInBits() == 1 || |
4059 | getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) && |
4060 | (Cond == ISD::SETEQ || Cond == ISD::SETNE) && |
4061 | (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) && |
4062 | N0.getOpcode() == ISD::AND) { |
4063 | if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { |
4064 | EVT ShiftTy = |
4065 | getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize()); |
4066 | if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 |
4067 | // Perform the xform if the AND RHS is a single bit. |
4068 | unsigned ShCt = AndRHS->getAPIntValue().logBase2(); |
4069 | if (AndRHS->getAPIntValue().isPowerOf2() && |
4070 | !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) { |
4071 | return DAG.getNode(ISD::TRUNCATE, dl, VT, |
4072 | DAG.getNode(ISD::SRL, dl, ShValTy, N0, |
4073 | DAG.getConstant(ShCt, dl, ShiftTy))); |
4074 | } |
4075 | } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) { |
4076 | // (X & 8) == 8 --> (X & 8) >> 3 |
4077 | // Perform the xform if C1 is a single bit. |
4078 | unsigned ShCt = C1.logBase2(); |
4079 | if (C1.isPowerOf2() && |
4080 | !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) { |
4081 | return DAG.getNode(ISD::TRUNCATE, dl, VT, |
4082 | DAG.getNode(ISD::SRL, dl, ShValTy, N0, |
4083 | DAG.getConstant(ShCt, dl, ShiftTy))); |
4084 | } |
4085 | } |
4086 | } |
4087 | } |
4088 | |
4089 | if (C1.getMinSignedBits() <= 64 && |
4090 | !isLegalICmpImmediate(C1.getSExtValue())) { |
4091 | EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize()); |
4092 | // (X & -256) == 256 -> (X >> 8) == 1 |
4093 | if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && |
4094 | N0.getOpcode() == ISD::AND && N0.hasOneUse()) { |
4095 | if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { |
4096 | const APInt &AndRHSC = AndRHS->getAPIntValue(); |
4097 | if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { |
4098 | unsigned ShiftBits = AndRHSC.countTrailingZeros(); |
4099 | if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) { |
4100 | SDValue Shift = |
4101 | DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0), |
4102 | DAG.getConstant(ShiftBits, dl, ShiftTy)); |
4103 | SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy); |
4104 | return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); |
4105 | } |
4106 | } |
4107 | } |
4108 | } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE || |
4109 | Cond == ISD::SETULE || Cond == ISD::SETUGT) { |
4110 | bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT); |
4111 | // X < 0x100000000 -> (X >> 32) < 1 |
4112 | // X >= 0x100000000 -> (X >> 32) >= 1 |
4113 | // X <= 0x0ffffffff -> (X >> 32) < 1 |
4114 | // X > 0x0ffffffff -> (X >> 32) >= 1 |
4115 | unsigned ShiftBits; |
4116 | APInt NewC = C1; |
4117 | ISD::CondCode NewCond = Cond; |
4118 | if (AdjOne) { |
4119 | ShiftBits = C1.countTrailingOnes(); |
4120 | NewC = NewC + 1; |
4121 | NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; |
4122 | } else { |
4123 | ShiftBits = C1.countTrailingZeros(); |
4124 | } |
4125 | NewC.lshrInPlace(ShiftBits); |
4126 | if (ShiftBits && NewC.getMinSignedBits() <= 64 && |
4127 | isLegalICmpImmediate(NewC.getSExtValue()) && |
4128 | !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) { |
4129 | SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0, |
4130 | DAG.getConstant(ShiftBits, dl, ShiftTy)); |
4131 | SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy); |
4132 | return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond); |
4133 | } |
4134 | } |
4135 | } |
4136 | } |
4137 | |
4138 | if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) { |
4139 | auto *CFP = cast<ConstantFPSDNode>(N1); |
4140 | assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value")((!CFP->getValueAPF().isNaN() && "Unexpected NaN value" ) ? static_cast<void> (0) : __assert_fail ("!CFP->getValueAPF().isNaN() && \"Unexpected NaN value\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 4140, __PRETTY_FUNCTION__)); |
4141 | |
4142 | // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the |
4143 | // constant if knowing that the operand is non-nan is enough. We prefer to |
4144 | // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to |
4145 | // materialize 0.0. |
4146 | if (Cond == ISD::SETO || Cond == ISD::SETUO) |
4147 | return DAG.getSetCC(dl, VT, N0, N0, Cond); |
4148 | |
4149 | // setcc (fneg x), C -> setcc swap(pred) x, -C |
4150 | if (N0.getOpcode() == ISD::FNEG) { |
4151 | ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond); |
4152 | if (DCI.isBeforeLegalizeOps() || |
4153 | isCondCodeLegal(SwapCond, N0.getSimpleValueType())) { |
4154 | SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1); |
4155 | return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond); |
4156 | } |
4157 | } |
4158 | |
4159 | // If the condition is not legal, see if we can find an equivalent one |
4160 | // which is legal. |
4161 | if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) { |
4162 | // If the comparison was an awkward floating-point == or != and one of |
4163 | // the comparison operands is infinity or negative infinity, convert the |
4164 | // condition to a less-awkward <= or >=. |
4165 | if (CFP->getValueAPF().isInfinity()) { |
4166 | bool IsNegInf = CFP->getValueAPF().isNegative(); |
4167 | ISD::CondCode NewCond = ISD::SETCC_INVALID; |
4168 | switch (Cond) { |
4169 | case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break; |
4170 | case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break; |
4171 | case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break; |
4172 | case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break; |
4173 | default: break; |
4174 | } |
4175 | if (NewCond != ISD::SETCC_INVALID && |
4176 | isCondCodeLegal(NewCond, N0.getSimpleValueType())) |
4177 | return DAG.getSetCC(dl, VT, N0, N1, NewCond); |
4178 | } |
4179 | } |
4180 | } |
4181 | |
4182 | if (N0 == N1) { |
4183 | // The sext(setcc()) => setcc() optimization relies on the appropriate |
4184 | // constant being emitted. |
4185 | assert(!N0.getValueType().isInteger() &&((!N0.getValueType().isInteger() && "Integer types should be handled by FoldSetCC" ) ? static_cast<void> (0) : __assert_fail ("!N0.getValueType().isInteger() && \"Integer types should be handled by FoldSetCC\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 4186, __PRETTY_FUNCTION__)) |
4186 | "Integer types should be handled by FoldSetCC")((!N0.getValueType().isInteger() && "Integer types should be handled by FoldSetCC" ) ? static_cast<void> (0) : __assert_fail ("!N0.getValueType().isInteger() && \"Integer types should be handled by FoldSetCC\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 4186, __PRETTY_FUNCTION__)); |
4187 | |
4188 | bool EqTrue = ISD::isTrueWhenEqual(Cond); |
4189 | unsigned UOF = ISD::getUnorderedFlavor(Cond); |
4190 | if (UOF == 2) // FP operators that are undefined on NaNs. |
4191 | return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); |
4192 | if (UOF == unsigned(EqTrue)) |
4193 | return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); |
4194 | // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO |
4195 | // if it is not already. |
4196 | ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; |
4197 | if (NewCond != Cond && |
4198 | (DCI.isBeforeLegalizeOps() || |
4199 | isCondCodeLegal(NewCond, N0.getSimpleValueType()))) |
4200 | return DAG.getSetCC(dl, VT, N0, N1, NewCond); |
4201 | } |
4202 | |
4203 | if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && |
4204 | N0.getValueType().isInteger()) { |
4205 | if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB || |
4206 | N0.getOpcode() == ISD::XOR) { |
4207 | // Simplify (X+Y) == (X+Z) --> Y == Z |
4208 | if (N0.getOpcode() == N1.getOpcode()) { |
4209 | if (N0.getOperand(0) == N1.getOperand(0)) |
4210 | return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond); |
4211 | if (N0.getOperand(1) == N1.getOperand(1)) |
4212 | return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond); |
4213 | if (isCommutativeBinOp(N0.getOpcode())) { |
4214 | // If X op Y == Y op X, try other combinations. |
4215 | if (N0.getOperand(0) == N1.getOperand(1)) |
4216 | return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), |
4217 | Cond); |
4218 | if (N0.getOperand(1) == N1.getOperand(0)) |
4219 | return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1), |
4220 | Cond); |
4221 | } |
4222 | } |
4223 | |
4224 | // If RHS is a legal immediate value for a compare instruction, we need |
4225 | // to be careful about increasing register pressure needlessly. |
4226 | bool LegalRHSImm = false; |
4227 | |
4228 | if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) { |
4229 | if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { |
4230 | // Turn (X+C1) == C2 --> X == C2-C1 |
4231 | if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) { |
4232 | return DAG.getSetCC(dl, VT, N0.getOperand(0), |
4233 | DAG.getConstant(RHSC->getAPIntValue()- |
4234 | LHSR->getAPIntValue(), |
4235 | dl, N0.getValueType()), Cond); |
4236 | } |
4237 | |
4238 | // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. |
4239 | if (N0.getOpcode() == ISD::XOR) |
4240 | // If we know that all of the inverted bits are zero, don't bother |
4241 | // performing the inversion. |
4242 | if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue())) |
4243 | return |
4244 | DAG.getSetCC(dl, VT, N0.getOperand(0), |
4245 | DAG.getConstant(LHSR->getAPIntValue() ^ |
4246 | RHSC->getAPIntValue(), |
4247 | dl, N0.getValueType()), |
4248 | Cond); |
4249 | } |
4250 | |
4251 | // Turn (C1-X) == C2 --> X == C1-C2 |
4252 | if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { |
4253 | if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { |
4254 | return |
4255 | DAG.getSetCC(dl, VT, N0.getOperand(1), |
4256 | DAG.getConstant(SUBC->getAPIntValue() - |
4257 | RHSC->getAPIntValue(), |
4258 | dl, N0.getValueType()), |
4259 | Cond); |
4260 | } |
4261 | } |
4262 | |
4263 | // Could RHSC fold directly into a compare? |
4264 | if (RHSC->getValueType(0).getSizeInBits() <= 64) |
4265 | LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue()); |
4266 | } |
4267 | |
4268 | // (X+Y) == X --> Y == 0 and similar folds. |
4269 | // Don't do this if X is an immediate that can fold into a cmp |
4270 | // instruction and X+Y has other uses. It could be an induction variable |
4271 | // chain, and the transform would increase register pressure. |
4272 | if (!LegalRHSImm || N0.hasOneUse()) |
4273 | if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI)) |
4274 | return V; |
4275 | } |
4276 | |
4277 | if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB || |
4278 | N1.getOpcode() == ISD::XOR) |
4279 | if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI)) |
4280 | return V; |
4281 | |
4282 | if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI)) |
4283 | return V; |
4284 | } |
4285 | |
4286 | // Fold remainder of division by a constant. |
4287 | if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) && |
4288 | N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { |
4289 | AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); |
4290 | |
4291 | // When division is cheap or optimizing for minimum size, |
4292 | // fall through to DIVREM creation by skipping this fold. |
4293 | if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) { |
4294 | if (N0.getOpcode() == ISD::UREM) { |
4295 | if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl)) |
4296 | return Folded; |
4297 | } else if (N0.getOpcode() == ISD::SREM) { |
4298 | if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl)) |
4299 | return Folded; |
4300 | } |
4301 | } |
4302 | } |
4303 | |
4304 | // Fold away ALL boolean setcc's. |
4305 | if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) { |
4306 | SDValue Temp; |
4307 | switch (Cond) { |
4308 | default: llvm_unreachable("Unknown integer setcc!")::llvm::llvm_unreachable_internal("Unknown integer setcc!", "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 4308); |
4309 | case ISD::SETEQ: // X == Y -> ~(X^Y) |
4310 | Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1); |
4311 | N0 = DAG.getNOT(dl, Temp, OpVT); |
4312 | if (!DCI.isCalledByLegalizer()) |
4313 | DCI.AddToWorklist(Temp.getNode()); |
4314 | break; |
4315 | case ISD::SETNE: // X != Y --> (X^Y) |
4316 | N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1); |
4317 | break; |
4318 | case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y |
4319 | case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y |
4320 | Temp = DAG.getNOT(dl, N0, OpVT); |
4321 | N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp); |
4322 | if (!DCI.isCalledByLegalizer()) |
4323 | DCI.AddToWorklist(Temp.getNode()); |
4324 | break; |
4325 | case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X |
4326 | case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X |
4327 | Temp = DAG.getNOT(dl, N1, OpVT); |
4328 | N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp); |
4329 | if (!DCI.isCalledByLegalizer()) |
4330 | DCI.AddToWorklist(Temp.getNode()); |
4331 | break; |
4332 | case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y |
4333 | case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y |
4334 | Temp = DAG.getNOT(dl, N0, OpVT); |
4335 | N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp); |
4336 | if (!DCI.isCalledByLegalizer()) |
4337 | DCI.AddToWorklist(Temp.getNode()); |
4338 | break; |
4339 | case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X |
4340 | case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X |
4341 | Temp = DAG.getNOT(dl, N1, OpVT); |
4342 | N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp); |
4343 | break; |
4344 | } |
4345 | if (VT.getScalarType() != MVT::i1) { |
4346 | if (!DCI.isCalledByLegalizer()) |
4347 | DCI.AddToWorklist(N0.getNode()); |
4348 | // FIXME: If running after legalize, we probably can't do this. |
4349 | ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT)); |
4350 | N0 = DAG.getNode(ExtendCode, dl, VT, N0); |
4351 | } |
4352 | return N0; |
4353 | } |
4354 | |
4355 | // Could not fold it. |
4356 | return SDValue(); |
4357 | } |
4358 | |
4359 | /// Returns true (and the GlobalValue and the offset) if the node is a |
4360 | /// GlobalAddress + offset. |
4361 | bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA, |
4362 | int64_t &Offset) const { |
4363 | |
4364 | SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode(); |
4365 | |
4366 | if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) { |
4367 | GA = GASD->getGlobal(); |
4368 | Offset += GASD->getOffset(); |
4369 | return true; |
4370 | } |
4371 | |
4372 | if (N->getOpcode() == ISD::ADD) { |
4373 | SDValue N1 = N->getOperand(0); |
4374 | SDValue N2 = N->getOperand(1); |
4375 | if (isGAPlusOffset(N1.getNode(), GA, Offset)) { |
4376 | if (auto *V = dyn_cast<ConstantSDNode>(N2)) { |
4377 | Offset += V->getSExtValue(); |
4378 | return true; |
4379 | } |
4380 | } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) { |
4381 | if (auto *V = dyn_cast<ConstantSDNode>(N1)) { |
4382 | Offset += V->getSExtValue(); |
4383 | return true; |
4384 | } |
4385 | } |
4386 | } |
4387 | |
4388 | return false; |
4389 | } |
4390 | |
4391 | SDValue TargetLowering::PerformDAGCombine(SDNode *N, |
4392 | DAGCombinerInfo &DCI) const { |
4393 | // Default implementation: no optimization. |
4394 | return SDValue(); |
4395 | } |
4396 | |
4397 | //===----------------------------------------------------------------------===// |
4398 | // Inline Assembler Implementation Methods |
4399 | //===----------------------------------------------------------------------===// |
4400 | |
4401 | TargetLowering::ConstraintType |
4402 | TargetLowering::getConstraintType(StringRef Constraint) const { |
4403 | unsigned S = Constraint.size(); |
4404 | |
4405 | if (S == 1) { |
4406 | switch (Constraint[0]) { |
4407 | default: break; |
4408 | case 'r': |
4409 | return C_RegisterClass; |
4410 | case 'm': // memory |
4411 | case 'o': // offsetable |
4412 | case 'V': // not offsetable |
4413 | return C_Memory; |
4414 | case 'n': // Simple Integer |
4415 | case 'E': // Floating Point Constant |
4416 | case 'F': // Floating Point Constant |
4417 | return C_Immediate; |
4418 | case 'i': // Simple Integer or Relocatable Constant |
4419 | case 's': // Relocatable Constant |
4420 | case 'p': // Address. |
4421 | case 'X': // Allow ANY value. |
4422 | case 'I': // Target registers. |
4423 | case 'J': |
4424 | case 'K': |
4425 | case 'L': |
4426 | case 'M': |
4427 | case 'N': |
4428 | case 'O': |
4429 | case 'P': |
4430 | case '<': |
4431 | case '>': |
4432 | return C_Other; |
4433 | } |
4434 | } |
4435 | |
4436 | if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') { |
4437 | if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}" |
4438 | return C_Memory; |
4439 | return C_Register; |
4440 | } |
4441 | return C_Unknown; |
4442 | } |
4443 | |
4444 | /// Try to replace an X constraint, which matches anything, with another that |
4445 | /// has more specific requirements based on the type of the corresponding |
4446 | /// operand. |
4447 | const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const { |
4448 | if (ConstraintVT.isInteger()) |
4449 | return "r"; |
4450 | if (ConstraintVT.isFloatingPoint()) |
4451 | return "f"; // works for many targets |
4452 | return nullptr; |
4453 | } |
4454 | |
4455 | SDValue TargetLowering::LowerAsmOutputForConstraint( |
4456 | SDValue &Chain, SDValue &Flag, const SDLoc &DL, |
4457 | const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const { |
4458 | return SDValue(); |
4459 | } |
4460 | |
4461 | /// Lower the specified operand into the Ops vector. |
4462 | /// If it is invalid, don't add anything to Ops. |
4463 | void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, |
4464 | std::string &Constraint, |
4465 | std::vector<SDValue> &Ops, |
4466 | SelectionDAG &DAG) const { |
4467 | |
4468 | if (Constraint.length() > 1) return; |
4469 | |
4470 | char ConstraintLetter = Constraint[0]; |
4471 | switch (ConstraintLetter) { |
4472 | default: break; |
4473 | case 'X': // Allows any operand; labels (basic block) use this. |
4474 | if (Op.getOpcode() == ISD::BasicBlock || |
4475 | Op.getOpcode() == ISD::TargetBlockAddress) { |
4476 | Ops.push_back(Op); |
4477 | return; |
4478 | } |
4479 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; |
4480 | case 'i': // Simple Integer or Relocatable Constant |
4481 | case 'n': // Simple Integer |
4482 | case 's': { // Relocatable Constant |
4483 | |
4484 | GlobalAddressSDNode *GA; |
4485 | ConstantSDNode *C; |
4486 | BlockAddressSDNode *BA; |
4487 | uint64_t Offset = 0; |
4488 | |
4489 | // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C), |
4490 | // etc., since getelementpointer is variadic. We can't use |
4491 | // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible |
4492 | // while in this case the GA may be furthest from the root node which is |
4493 | // likely an ISD::ADD. |
4494 | while (1) { |
4495 | if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') { |
4496 | Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), |
4497 | GA->getValueType(0), |
4498 | Offset + GA->getOffset())); |
4499 | return; |
4500 | } else if ((C = dyn_cast<ConstantSDNode>(Op)) && |
4501 | ConstraintLetter != 's') { |
4502 | // gcc prints these as sign extended. Sign extend value to 64 bits |
4503 | // now; without this it would get ZExt'd later in |
4504 | // ScheduleDAGSDNodes::EmitNode, which is very generic. |
4505 | bool IsBool = C->getConstantIntValue()->getBitWidth() == 1; |
4506 | BooleanContent BCont = getBooleanContents(MVT::i64); |
4507 | ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont) |
4508 | : ISD::SIGN_EXTEND; |
4509 | int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() |
4510 | : C->getSExtValue(); |
4511 | Ops.push_back(DAG.getTargetConstant(Offset + ExtVal, |
4512 | SDLoc(C), MVT::i64)); |
4513 | return; |
4514 | } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) && |
4515 | ConstraintLetter != 'n') { |
4516 | Ops.push_back(DAG.getTargetBlockAddress( |
4517 | BA->getBlockAddress(), BA->getValueType(0), |
4518 | Offset + BA->getOffset(), BA->getTargetFlags())); |
4519 | return; |
4520 | } else { |
4521 | const unsigned OpCode = Op.getOpcode(); |
4522 | if (OpCode == ISD::ADD || OpCode == ISD::SUB) { |
4523 | if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0)))) |
4524 | Op = Op.getOperand(1); |
4525 | // Subtraction is not commutative. |
4526 | else if (OpCode == ISD::ADD && |
4527 | (C = dyn_cast<ConstantSDNode>(Op.getOperand(1)))) |
4528 | Op = Op.getOperand(0); |
4529 | else |
4530 | return; |
4531 | Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue(); |
4532 | continue; |
4533 | } |
4534 | } |
4535 | return; |
4536 | } |
4537 | break; |
4538 | } |
4539 | } |
4540 | } |
4541 | |
4542 | std::pair<unsigned, const TargetRegisterClass *> |
4543 | TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, |
4544 | StringRef Constraint, |
4545 | MVT VT) const { |
4546 | if (Constraint.empty() || Constraint[0] != '{') |
4547 | return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr)); |
4548 | assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?")((*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?" ) ? static_cast<void> (0) : __assert_fail ("*(Constraint.end() - 1) == '}' && \"Not a brace enclosed constraint?\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 4548, __PRETTY_FUNCTION__)); |
4549 | |
4550 | // Remove the braces from around the name. |
4551 | StringRef RegName(Constraint.data() + 1, Constraint.size() - 2); |
4552 | |
4553 | std::pair<unsigned, const TargetRegisterClass *> R = |
4554 | std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr)); |
4555 | |
4556 | // Figure out which register class contains this reg. |
4557 | for (const TargetRegisterClass *RC : RI->regclasses()) { |
4558 | // If none of the value types for this register class are valid, we |
4559 | // can't use it. For example, 64-bit reg classes on 32-bit targets. |
4560 | if (!isLegalRC(*RI, *RC)) |
4561 | continue; |
4562 | |
4563 | for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); |
4564 | I != E; ++I) { |
4565 | if (RegName.equals_lower(RI->getRegAsmName(*I))) { |
4566 | std::pair<unsigned, const TargetRegisterClass *> S = |
4567 | std::make_pair(*I, RC); |
4568 | |
4569 | // If this register class has the requested value type, return it, |
4570 | // otherwise keep searching and return the first class found |
4571 | // if no other is found which explicitly has the requested type. |
4572 | if (RI->isTypeLegalForClass(*RC, VT)) |
4573 | return S; |
4574 | if (!R.second) |
4575 | R = S; |
4576 | } |
4577 | } |
4578 | } |
4579 | |
4580 | return R; |
4581 | } |
4582 | |
4583 | //===----------------------------------------------------------------------===// |
4584 | // Constraint Selection. |
4585 | |
4586 | /// Return true of this is an input operand that is a matching constraint like |
4587 | /// "4". |
4588 | bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const { |
4589 | assert(!ConstraintCode.empty() && "No known constraint!")((!ConstraintCode.empty() && "No known constraint!") ? static_cast<void> (0) : __assert_fail ("!ConstraintCode.empty() && \"No known constraint!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 4589, __PRETTY_FUNCTION__)); |
4590 | return isdigit(static_cast<unsigned char>(ConstraintCode[0])); |
4591 | } |
4592 | |
4593 | /// If this is an input matching constraint, this method returns the output |
4594 | /// operand it matches. |
4595 | unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { |
4596 | assert(!ConstraintCode.empty() && "No known constraint!")((!ConstraintCode.empty() && "No known constraint!") ? static_cast<void> (0) : __assert_fail ("!ConstraintCode.empty() && \"No known constraint!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 4596, __PRETTY_FUNCTION__)); |
4597 | return atoi(ConstraintCode.c_str()); |
4598 | } |
4599 | |
4600 | /// Split up the constraint string from the inline assembly value into the |
4601 | /// specific constraints and their prefixes, and also tie in the associated |
4602 | /// operand values. |
4603 | /// If this returns an empty vector, and if the constraint string itself |
4604 | /// isn't empty, there was an error parsing. |
4605 | TargetLowering::AsmOperandInfoVector |
4606 | TargetLowering::ParseConstraints(const DataLayout &DL, |
4607 | const TargetRegisterInfo *TRI, |
4608 | const CallBase &Call) const { |
4609 | /// Information about all of the constraints. |
4610 | AsmOperandInfoVector ConstraintOperands; |
4611 | const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand()); |
4612 | unsigned maCount = 0; // Largest number of multiple alternative constraints. |
4613 | |
4614 | // Do a prepass over the constraints, canonicalizing them, and building up the |
4615 | // ConstraintOperands list. |
4616 | unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. |
4617 | unsigned ResNo = 0; // ResNo - The result number of the next output. |
4618 | |
4619 | for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) { |
4620 | ConstraintOperands.emplace_back(std::move(CI)); |
4621 | AsmOperandInfo &OpInfo = ConstraintOperands.back(); |
4622 | |
4623 | // Update multiple alternative constraint count. |
4624 | if (OpInfo.multipleAlternatives.size() > maCount) |
4625 | maCount = OpInfo.multipleAlternatives.size(); |
4626 | |
4627 | OpInfo.ConstraintVT = MVT::Other; |
4628 | |
4629 | // Compute the value type for each operand. |
4630 | switch (OpInfo.Type) { |
4631 | case InlineAsm::isOutput: |
4632 | // Indirect outputs just consume an argument. |
4633 | if (OpInfo.isIndirect) { |
4634 | OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++); |
4635 | break; |
4636 | } |
4637 | |
4638 | // The return value of the call is this value. As such, there is no |
4639 | // corresponding argument. |
4640 | assert(!Call.getType()->isVoidTy() && "Bad inline asm!")((!Call.getType()->isVoidTy() && "Bad inline asm!" ) ? static_cast<void> (0) : __assert_fail ("!Call.getType()->isVoidTy() && \"Bad inline asm!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 4640, __PRETTY_FUNCTION__)); |
4641 | if (StructType *STy = dyn_cast<StructType>(Call.getType())) { |
4642 | OpInfo.ConstraintVT = |
4643 | getSimpleValueType(DL, STy->getElementType(ResNo)); |
4644 | } else { |
4645 | assert(ResNo == 0 && "Asm only has one result!")((ResNo == 0 && "Asm only has one result!") ? static_cast <void> (0) : __assert_fail ("ResNo == 0 && \"Asm only has one result!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 4645, __PRETTY_FUNCTION__)); |
4646 | OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType()); |
4647 | } |
4648 | ++ResNo; |
4649 | break; |
4650 | case InlineAsm::isInput: |
4651 | OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++); |
4652 | break; |
4653 | case InlineAsm::isClobber: |
4654 | // Nothing to do. |
4655 | break; |
4656 | } |
4657 | |
4658 | if (OpInfo.CallOperandVal) { |
4659 | llvm::Type *OpTy = OpInfo.CallOperandVal->getType(); |
4660 | if (OpInfo.isIndirect) { |
4661 | llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); |
4662 | if (!PtrTy) |
4663 | report_fatal_error("Indirect operand for inline asm not a pointer!"); |
4664 | OpTy = PtrTy->getElementType(); |
4665 | } |
4666 | |
4667 | // Look for vector wrapped in a struct. e.g. { <16 x i8> }. |
4668 | if (StructType *STy = dyn_cast<StructType>(OpTy)) |
4669 | if (STy->getNumElements() == 1) |
4670 | OpTy = STy->getElementType(0); |
4671 | |
4672 | // If OpTy is not a single value, it may be a struct/union that we |
4673 | // can tile with integers. |
4674 | if (!OpTy->isSingleValueType() && OpTy->isSized()) { |
4675 | unsigned BitSize = DL.getTypeSizeInBits(OpTy); |
4676 | switch (BitSize) { |
4677 | default: break; |
4678 | case 1: |
4679 | case 8: |
4680 | case 16: |
4681 | case 32: |
4682 | case 64: |
4683 | case 128: |
4684 | OpInfo.ConstraintVT = |
4685 | MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true); |
4686 | break; |
4687 | } |
4688 | } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) { |
4689 | unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace()); |
4690 | OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize); |
4691 | } else { |
4692 | OpInfo.ConstraintVT = MVT::getVT(OpTy, true); |
4693 | } |
4694 | } |
4695 | } |
4696 | |
4697 | // If we have multiple alternative constraints, select the best alternative. |
4698 | if (!ConstraintOperands.empty()) { |
4699 | if (maCount) { |
4700 | unsigned bestMAIndex = 0; |
4701 | int bestWeight = -1; |
4702 | // weight: -1 = invalid match, and 0 = so-so match to 5 = good match. |
4703 | int weight = -1; |
4704 | unsigned maIndex; |
4705 | // Compute the sums of the weights for each alternative, keeping track |
4706 | // of the best (highest weight) one so far. |
4707 | for (maIndex = 0; maIndex < maCount; ++maIndex) { |
4708 | int weightSum = 0; |
4709 | for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); |
4710 | cIndex != eIndex; ++cIndex) { |
4711 | AsmOperandInfo &OpInfo = ConstraintOperands[cIndex]; |
4712 | if (OpInfo.Type == InlineAsm::isClobber) |
4713 | continue; |
4714 | |
4715 | // If this is an output operand with a matching input operand, |
4716 | // look up the matching input. If their types mismatch, e.g. one |
4717 | // is an integer, the other is floating point, or their sizes are |
4718 | // different, flag it as an maCantMatch. |
4719 | if (OpInfo.hasMatchingInput()) { |
4720 | AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; |
4721 | if (OpInfo.ConstraintVT != Input.ConstraintVT) { |
4722 | if ((OpInfo.ConstraintVT.isInteger() != |
4723 | Input.ConstraintVT.isInteger()) || |
4724 | (OpInfo.ConstraintVT.getSizeInBits() != |
4725 | Input.ConstraintVT.getSizeInBits())) { |
4726 | weightSum = -1; // Can't match. |
4727 | break; |
4728 | } |
4729 | } |
4730 | } |
4731 | weight = getMultipleConstraintMatchWeight(OpInfo, maIndex); |
4732 | if (weight == -1) { |
4733 | weightSum = -1; |
4734 | break; |
4735 | } |
4736 | weightSum += weight; |
4737 | } |
4738 | // Update best. |
4739 | if (weightSum > bestWeight) { |
4740 | bestWeight = weightSum; |
4741 | bestMAIndex = maIndex; |
4742 | } |
4743 | } |
4744 | |
4745 | // Now select chosen alternative in each constraint. |
4746 | for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); |
4747 | cIndex != eIndex; ++cIndex) { |
4748 | AsmOperandInfo &cInfo = ConstraintOperands[cIndex]; |
4749 | if (cInfo.Type == InlineAsm::isClobber) |
4750 | continue; |
4751 | cInfo.selectAlternative(bestMAIndex); |
4752 | } |
4753 | } |
4754 | } |
4755 | |
4756 | // Check and hook up tied operands, choose constraint code to use. |
4757 | for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); |
4758 | cIndex != eIndex; ++cIndex) { |
4759 | AsmOperandInfo &OpInfo = ConstraintOperands[cIndex]; |
4760 | |
4761 | // If this is an output operand with a matching input operand, look up the |
4762 | // matching input. If their types mismatch, e.g. one is an integer, the |
4763 | // other is floating point, or their sizes are different, flag it as an |
4764 | // error. |
4765 | if (OpInfo.hasMatchingInput()) { |
4766 | AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; |
4767 | |
4768 | if (OpInfo.ConstraintVT != Input.ConstraintVT) { |
4769 | std::pair<unsigned, const TargetRegisterClass *> MatchRC = |
4770 | getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, |
4771 | OpInfo.ConstraintVT); |
4772 | std::pair<unsigned, const TargetRegisterClass *> InputRC = |
4773 | getRegForInlineAsmConstraint(TRI, Input.ConstraintCode, |
4774 | Input.ConstraintVT); |
4775 | if ((OpInfo.ConstraintVT.isInteger() != |
4776 | Input.ConstraintVT.isInteger()) || |
4777 | (MatchRC.second != InputRC.second)) { |
4778 | report_fatal_error("Unsupported asm: input constraint" |
4779 | " with a matching output constraint of" |
4780 | " incompatible type!"); |
4781 | } |
4782 | } |
4783 | } |
4784 | } |
4785 | |
4786 | return ConstraintOperands; |
4787 | } |
4788 | |
4789 | /// Return an integer indicating how general CT is. |
4790 | static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { |
4791 | switch (CT) { |
4792 | case TargetLowering::C_Immediate: |
4793 | case TargetLowering::C_Other: |
4794 | case TargetLowering::C_Unknown: |
4795 | return 0; |
4796 | case TargetLowering::C_Register: |
4797 | return 1; |
4798 | case TargetLowering::C_RegisterClass: |
4799 | return 2; |
4800 | case TargetLowering::C_Memory: |
4801 | return 3; |
4802 | } |
4803 | llvm_unreachable("Invalid constraint type")::llvm::llvm_unreachable_internal("Invalid constraint type", "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 4803); |
4804 | } |
4805 | |
4806 | /// Examine constraint type and operand type and determine a weight value. |
4807 | /// This object must already have been set up with the operand type |
4808 | /// and the current alternative constraint selected. |
4809 | TargetLowering::ConstraintWeight |
4810 | TargetLowering::getMultipleConstraintMatchWeight( |
4811 | AsmOperandInfo &info, int maIndex) const { |
4812 | InlineAsm::ConstraintCodeVector *rCodes; |
4813 | if (maIndex >= (int)info.multipleAlternatives.size()) |
4814 | rCodes = &info.Codes; |
4815 | else |
4816 | rCodes = &info.multipleAlternatives[maIndex].Codes; |
4817 | ConstraintWeight BestWeight = CW_Invalid; |
4818 | |
4819 | // Loop over the options, keeping track of the most general one. |
4820 | for (unsigned i = 0, e = rCodes->size(); i != e; ++i) { |
4821 | ConstraintWeight weight = |
4822 | getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str()); |
4823 | if (weight > BestWeight) |
4824 | BestWeight = weight; |
4825 | } |
4826 | |
4827 | return BestWeight; |
4828 | } |
4829 | |
4830 | /// Examine constraint type and operand type and determine a weight value. |
4831 | /// This object must already have been set up with the operand type |
4832 | /// and the current alternative constraint selected. |
4833 | TargetLowering::ConstraintWeight |
4834 | TargetLowering::getSingleConstraintMatchWeight( |
4835 | AsmOperandInfo &info, const char *constraint) const { |
4836 | ConstraintWeight weight = CW_Invalid; |
4837 | Value *CallOperandVal = info.CallOperandVal; |
4838 | // If we don't have a value, we can't do a match, |
4839 | // but allow it at the lowest weight. |
4840 | if (!CallOperandVal) |
4841 | return CW_Default; |
4842 | // Look at the constraint type. |
4843 | switch (*constraint) { |
4844 | case 'i': // immediate integer. |
4845 | case 'n': // immediate integer with a known value. |
4846 | if (isa<ConstantInt>(CallOperandVal)) |
4847 | weight = CW_Constant; |
4848 | break; |
4849 | case 's': // non-explicit intregal immediate. |
4850 | if (isa<GlobalValue>(CallOperandVal)) |
4851 | weight = CW_Constant; |
4852 | break; |
4853 | case 'E': // immediate float if host format. |
4854 | case 'F': // immediate float. |
4855 | if (isa<ConstantFP>(CallOperandVal)) |
4856 | weight = CW_Constant; |
4857 | break; |
4858 | case '<': // memory operand with autodecrement. |
4859 | case '>': // memory operand with autoincrement. |
4860 | case 'm': // memory operand. |
4861 | case 'o': // offsettable memory operand |
4862 | case 'V': // non-offsettable memory operand |
4863 | weight = CW_Memory; |
4864 | break; |
4865 | case 'r': // general register. |
4866 | case 'g': // general register, memory operand or immediate integer. |
4867 | // note: Clang converts "g" to "imr". |
4868 | if (CallOperandVal->getType()->isIntegerTy()) |
4869 | weight = CW_Register; |
4870 | break; |
4871 | case 'X': // any operand. |
4872 | default: |
4873 | weight = CW_Default; |
4874 | break; |
4875 | } |
4876 | return weight; |
4877 | } |
4878 | |
4879 | /// If there are multiple different constraints that we could pick for this |
4880 | /// operand (e.g. "imr") try to pick the 'best' one. |
4881 | /// This is somewhat tricky: constraints fall into four classes: |
4882 | /// Other -> immediates and magic values |
4883 | /// Register -> one specific register |
4884 | /// RegisterClass -> a group of regs |
4885 | /// Memory -> memory |
4886 | /// Ideally, we would pick the most specific constraint possible: if we have |
4887 | /// something that fits into a register, we would pick it. The problem here |
4888 | /// is that if we have something that could either be in a register or in |
4889 | /// memory that use of the register could cause selection of *other* |
4890 | /// operands to fail: they might only succeed if we pick memory. Because of |
4891 | /// this the heuristic we use is: |
4892 | /// |
4893 | /// 1) If there is an 'other' constraint, and if the operand is valid for |
4894 | /// that constraint, use it. This makes us take advantage of 'i' |
4895 | /// constraints when available. |
4896 | /// 2) Otherwise, pick the most general constraint present. This prefers |
4897 | /// 'm' over 'r', for example. |
4898 | /// |
4899 | static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, |
4900 | const TargetLowering &TLI, |
4901 | SDValue Op, SelectionDAG *DAG) { |
4902 | assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options")((OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options" ) ? static_cast<void> (0) : __assert_fail ("OpInfo.Codes.size() > 1 && \"Doesn't have multiple constraint options\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 4902, __PRETTY_FUNCTION__)); |
4903 | unsigned BestIdx = 0; |
4904 | TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; |
4905 | int BestGenerality = -1; |
4906 | |
4907 | // Loop over the options, keeping track of the most general one. |
4908 | for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { |
4909 | TargetLowering::ConstraintType CType = |
4910 | TLI.getConstraintType(OpInfo.Codes[i]); |
4911 | |
4912 | // Indirect 'other' or 'immediate' constraints are not allowed. |
4913 | if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory || |
4914 | CType == TargetLowering::C_Register || |
4915 | CType == TargetLowering::C_RegisterClass)) |
4916 | continue; |
4917 | |
4918 | // If this is an 'other' or 'immediate' constraint, see if the operand is |
4919 | // valid for it. For example, on X86 we might have an 'rI' constraint. If |
4920 | // the operand is an integer in the range [0..31] we want to use I (saving a |
4921 | // load of a register), otherwise we must use 'r'. |
4922 | if ((CType == TargetLowering::C_Other || |
4923 | CType == TargetLowering::C_Immediate) && Op.getNode()) { |
4924 | assert(OpInfo.Codes[i].size() == 1 &&((OpInfo.Codes[i].size() == 1 && "Unhandled multi-letter 'other' constraint" ) ? static_cast<void> (0) : __assert_fail ("OpInfo.Codes[i].size() == 1 && \"Unhandled multi-letter 'other' constraint\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 4925, __PRETTY_FUNCTION__)) |
4925 | "Unhandled multi-letter 'other' constraint")((OpInfo.Codes[i].size() == 1 && "Unhandled multi-letter 'other' constraint" ) ? static_cast<void> (0) : __assert_fail ("OpInfo.Codes[i].size() == 1 && \"Unhandled multi-letter 'other' constraint\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 4925, __PRETTY_FUNCTION__)); |
4926 | std::vector<SDValue> ResultOps; |
4927 | TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i], |
4928 | ResultOps, *DAG); |
4929 | if (!ResultOps.empty()) { |
4930 | BestType = CType; |
4931 | BestIdx = i; |
4932 | break; |
4933 | } |
4934 | } |
4935 | |
4936 | // Things with matching constraints can only be registers, per gcc |
4937 | // documentation. This mainly affects "g" constraints. |
4938 | if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) |
4939 | continue; |
4940 | |
4941 | // This constraint letter is more general than the previous one, use it. |
4942 | int Generality = getConstraintGenerality(CType); |
4943 | if (Generality > BestGenerality) { |
4944 | BestType = CType; |
4945 | BestIdx = i; |
4946 | BestGenerality = Generality; |
4947 | } |
4948 | } |
4949 | |
4950 | OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; |
4951 | OpInfo.ConstraintType = BestType; |
4952 | } |
4953 | |
4954 | /// Determines the constraint code and constraint type to use for the specific |
4955 | /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. |
4956 | void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, |
4957 | SDValue Op, |
4958 | SelectionDAG *DAG) const { |
4959 | assert(!OpInfo.Codes.empty() && "Must have at least one constraint")((!OpInfo.Codes.empty() && "Must have at least one constraint" ) ? static_cast<void> (0) : __assert_fail ("!OpInfo.Codes.empty() && \"Must have at least one constraint\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 4959, __PRETTY_FUNCTION__)); |
4960 | |
4961 | // Single-letter constraints ('r') are very common. |
4962 | if (OpInfo.Codes.size() == 1) { |
4963 | OpInfo.ConstraintCode = OpInfo.Codes[0]; |
4964 | OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); |
4965 | } else { |
4966 | ChooseConstraint(OpInfo, *this, Op, DAG); |
4967 | } |
4968 | |
4969 | // 'X' matches anything. |
4970 | if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { |
4971 | // Labels and constants are handled elsewhere ('X' is the only thing |
4972 | // that matches labels). For Functions, the type here is the type of |
4973 | // the result, which is not what we want to look at; leave them alone. |
4974 | Value *v = OpInfo.CallOperandVal; |
4975 | if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) { |
4976 | OpInfo.CallOperandVal = v; |
4977 | return; |
4978 | } |
4979 | |
4980 | if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress) |
4981 | return; |
4982 | |
4983 | // Otherwise, try to resolve it to something we know about by looking at |
4984 | // the actual operand type. |
4985 | if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) { |
4986 | OpInfo.ConstraintCode = Repl; |
4987 | OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); |
4988 | } |
4989 | } |
4990 | } |
4991 | |
4992 | /// Given an exact SDIV by a constant, create a multiplication |
4993 | /// with the multiplicative inverse of the constant. |
4994 | static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, |
4995 | const SDLoc &dl, SelectionDAG &DAG, |
4996 | SmallVectorImpl<SDNode *> &Created) { |
4997 | SDValue Op0 = N->getOperand(0); |
4998 | SDValue Op1 = N->getOperand(1); |
4999 | EVT VT = N->getValueType(0); |
5000 | EVT SVT = VT.getScalarType(); |
5001 | EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); |
5002 | EVT ShSVT = ShVT.getScalarType(); |
5003 | |
5004 | bool UseSRA = false; |
5005 | SmallVector<SDValue, 16> Shifts, Factors; |
5006 | |
5007 | auto BuildSDIVPattern = [&](ConstantSDNode *C) { |
5008 | if (C->isNullValue()) |
5009 | return false; |
5010 | APInt Divisor = C->getAPIntValue(); |
5011 | unsigned Shift = Divisor.countTrailingZeros(); |
5012 | if (Shift) { |
5013 | Divisor.ashrInPlace(Shift); |
5014 | UseSRA = true; |
5015 | } |
5016 | // Calculate the multiplicative inverse, using Newton's method. |
5017 | APInt t; |
5018 | APInt Factor = Divisor; |
5019 | while ((t = Divisor * Factor) != 1) |
5020 | Factor *= APInt(Divisor.getBitWidth(), 2) - t; |
5021 | Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT)); |
5022 | Factors.push_back(DAG.getConstant(Factor, dl, SVT)); |
5023 | return true; |
5024 | }; |
5025 | |
5026 | // Collect all magic values from the build vector. |
5027 | if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern)) |
5028 | return SDValue(); |
5029 | |
5030 | SDValue Shift, Factor; |
5031 | if (VT.isVector()) { |
5032 | Shift = DAG.getBuildVector(ShVT, dl, Shifts); |
5033 | Factor = DAG.getBuildVector(VT, dl, Factors); |
5034 | } else { |
5035 | Shift = Shifts[0]; |
5036 | Factor = Factors[0]; |
5037 | } |
5038 | |
5039 | SDValue Res = Op0; |
5040 | |
5041 | // Shift the value upfront if it is even, so the LSB is one. |
5042 | if (UseSRA) { |
5043 | // TODO: For UDIV use SRL instead of SRA. |
5044 | SDNodeFlags Flags; |
5045 | Flags.setExact(true); |
5046 | Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags); |
5047 | Created.push_back(Res.getNode()); |
5048 | } |
5049 | |
5050 | return DAG.getNode(ISD::MUL, dl, VT, Res, Factor); |
5051 | } |
5052 | |
5053 | SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, |
5054 | SelectionDAG &DAG, |
5055 | SmallVectorImpl<SDNode *> &Created) const { |
5056 | AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); |
5057 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
5058 | if (TLI.isIntDivCheap(N->getValueType(0), Attr)) |
5059 | return SDValue(N, 0); // Lower SDIV as SDIV |
5060 | return SDValue(); |
5061 | } |
5062 | |
5063 | /// Given an ISD::SDIV node expressing a divide by constant, |
5064 | /// return a DAG expression to select that will generate the same value by |
5065 | /// multiplying by a magic number. |
5066 | /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". |
5067 | SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, |
5068 | bool IsAfterLegalization, |
5069 | SmallVectorImpl<SDNode *> &Created) const { |
5070 | SDLoc dl(N); |
5071 | EVT VT = N->getValueType(0); |
5072 | EVT SVT = VT.getScalarType(); |
5073 | EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); |
5074 | EVT ShSVT = ShVT.getScalarType(); |
5075 | unsigned EltBits = VT.getScalarSizeInBits(); |
5076 | |
5077 | // Check to see if we can do this. |
5078 | // FIXME: We should be more aggressive here. |
5079 | if (!isTypeLegal(VT)) |
5080 | return SDValue(); |
5081 | |
5082 | // If the sdiv has an 'exact' bit we can use a simpler lowering. |
5083 | if (N->getFlags().hasExact()) |
5084 | return BuildExactSDIV(*this, N, dl, DAG, Created); |
5085 | |
5086 | SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks; |
5087 | |
5088 | auto BuildSDIVPattern = [&](ConstantSDNode *C) { |
5089 | if (C->isNullValue()) |
5090 | return false; |
5091 | |
5092 | const APInt &Divisor = C->getAPIntValue(); |
5093 | APInt::ms magics = Divisor.magic(); |
5094 | int NumeratorFactor = 0; |
5095 | int ShiftMask = -1; |
5096 | |
5097 | if (Divisor.isOneValue() || Divisor.isAllOnesValue()) { |
5098 | // If d is +1/-1, we just multiply the numerator by +1/-1. |
5099 | NumeratorFactor = Divisor.getSExtValue(); |
5100 | magics.m = 0; |
5101 | magics.s = 0; |
5102 | ShiftMask = 0; |
5103 | } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { |
5104 | // If d > 0 and m < 0, add the numerator. |
5105 | NumeratorFactor = 1; |
5106 | } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { |
5107 | // If d < 0 and m > 0, subtract the numerator. |
5108 | NumeratorFactor = -1; |
5109 | } |
5110 | |
5111 | MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT)); |
5112 | Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT)); |
5113 | Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT)); |
5114 | ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT)); |
5115 | return true; |
5116 | }; |
5117 | |
5118 | SDValue N0 = N->getOperand(0); |
5119 | SDValue N1 = N->getOperand(1); |
5120 | |
5121 | // Collect the shifts / magic values from each element. |
5122 | if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern)) |
5123 | return SDValue(); |
5124 | |
5125 | SDValue MagicFactor, Factor, Shift, ShiftMask; |
5126 | if (VT.isVector()) { |
5127 | MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors); |
5128 | Factor = DAG.getBuildVector(VT, dl, Factors); |
5129 | Shift = DAG.getBuildVector(ShVT, dl, Shifts); |
5130 | ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks); |
5131 | } else { |
5132 | MagicFactor = MagicFactors[0]; |
5133 | Factor = Factors[0]; |
5134 | Shift = Shifts[0]; |
5135 | ShiftMask = ShiftMasks[0]; |
5136 | } |
5137 | |
5138 | // Multiply the numerator (operand 0) by the magic value. |
5139 | // FIXME: We should support doing a MUL in a wider type. |
5140 | SDValue Q; |
5141 | if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) |
5142 | : isOperationLegalOrCustom(ISD::MULHS, VT)) |
5143 | Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor); |
5144 | else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) |
5145 | : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) { |
5146 | SDValue LoHi = |
5147 | DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor); |
5148 | Q = SDValue(LoHi.getNode(), 1); |
5149 | } else |
5150 | return SDValue(); // No mulhs or equivalent. |
5151 | Created.push_back(Q.getNode()); |
5152 | |
5153 | // (Optionally) Add/subtract the numerator using Factor. |
5154 | Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor); |
5155 | Created.push_back(Factor.getNode()); |
5156 | Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor); |
5157 | Created.push_back(Q.getNode()); |
5158 | |
5159 | // Shift right algebraic by shift value. |
5160 | Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift); |
5161 | Created.push_back(Q.getNode()); |
5162 | |
5163 | // Extract the sign bit, mask it and add it to the quotient. |
5164 | SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT); |
5165 | SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift); |
5166 | Created.push_back(T.getNode()); |
5167 | T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask); |
5168 | Created.push_back(T.getNode()); |
5169 | return DAG.getNode(ISD::ADD, dl, VT, Q, T); |
5170 | } |
5171 | |
5172 | /// Given an ISD::UDIV node expressing a divide by constant, |
5173 | /// return a DAG expression to select that will generate the same value by |
5174 | /// multiplying by a magic number. |
5175 | /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". |
5176 | SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, |
5177 | bool IsAfterLegalization, |
5178 | SmallVectorImpl<SDNode *> &Created) const { |
5179 | SDLoc dl(N); |
5180 | EVT VT = N->getValueType(0); |
5181 | EVT SVT = VT.getScalarType(); |
5182 | EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); |
5183 | EVT ShSVT = ShVT.getScalarType(); |
5184 | unsigned EltBits = VT.getScalarSizeInBits(); |
5185 | |
5186 | // Check to see if we can do this. |
5187 | // FIXME: We should be more aggressive here. |
5188 | if (!isTypeLegal(VT)) |
5189 | return SDValue(); |
5190 | |
5191 | bool UseNPQ = false; |
5192 | SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors; |
5193 | |
5194 | auto BuildUDIVPattern = [&](ConstantSDNode *C) { |
5195 | if (C->isNullValue()) |
5196 | return false; |
5197 | // FIXME: We should use a narrower constant when the upper |
5198 | // bits are known to be zero. |
5199 | APInt Divisor = C->getAPIntValue(); |
5200 | APInt::mu magics = Divisor.magicu(); |
5201 | unsigned PreShift = 0, PostShift = 0; |
5202 | |
5203 | // If the divisor is even, we can avoid using the expensive fixup by |
5204 | // shifting the divided value upfront. |
5205 | if (magics.a != 0 && !Divisor[0]) { |
5206 | PreShift = Divisor.countTrailingZeros(); |
5207 | // Get magic number for the shifted divisor. |
5208 | magics = Divisor.lshr(PreShift).magicu(PreShift); |
5209 | assert(magics.a == 0 && "Should use cheap fixup now")((magics.a == 0 && "Should use cheap fixup now") ? static_cast <void> (0) : __assert_fail ("magics.a == 0 && \"Should use cheap fixup now\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5209, __PRETTY_FUNCTION__)); |
5210 | } |
5211 | |
5212 | APInt Magic = magics.m; |
5213 | |
5214 | unsigned SelNPQ; |
5215 | if (magics.a == 0 || Divisor.isOneValue()) { |
5216 | assert(magics.s < Divisor.getBitWidth() &&((magics.s < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!" ) ? static_cast<void> (0) : __assert_fail ("magics.s < Divisor.getBitWidth() && \"We shouldn't generate an undefined shift!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5217, __PRETTY_FUNCTION__)) |
5217 | "We shouldn't generate an undefined shift!")((magics.s < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!" ) ? static_cast<void> (0) : __assert_fail ("magics.s < Divisor.getBitWidth() && \"We shouldn't generate an undefined shift!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5217, __PRETTY_FUNCTION__)); |
5218 | PostShift = magics.s; |
5219 | SelNPQ = false; |
5220 | } else { |
5221 | PostShift = magics.s - 1; |
5222 | SelNPQ = true; |
5223 | } |
5224 | |
5225 | PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT)); |
5226 | MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT)); |
5227 | NPQFactors.push_back( |
5228 | DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) |
5229 | : APInt::getNullValue(EltBits), |
5230 | dl, SVT)); |
5231 | PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT)); |
5232 | UseNPQ |= SelNPQ; |
5233 | return true; |
5234 | }; |
5235 | |
5236 | SDValue N0 = N->getOperand(0); |
5237 | SDValue N1 = N->getOperand(1); |
5238 | |
5239 | // Collect the shifts/magic values from each element. |
5240 | if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern)) |
5241 | return SDValue(); |
5242 | |
5243 | SDValue PreShift, PostShift, MagicFactor, NPQFactor; |
5244 | if (VT.isVector()) { |
5245 | PreShift = DAG.getBuildVector(ShVT, dl, PreShifts); |
5246 | MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors); |
5247 | NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors); |
5248 | PostShift = DAG.getBuildVector(ShVT, dl, PostShifts); |
5249 | } else { |
5250 | PreShift = PreShifts[0]; |
5251 | MagicFactor = MagicFactors[0]; |
5252 | PostShift = PostShifts[0]; |
5253 | } |
5254 | |
5255 | SDValue Q = N0; |
5256 | Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift); |
5257 | Created.push_back(Q.getNode()); |
5258 | |
5259 | // FIXME: We should support doing a MUL in a wider type. |
5260 | auto GetMULHU = [&](SDValue X, SDValue Y) { |
5261 | if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) |
5262 | : isOperationLegalOrCustom(ISD::MULHU, VT)) |
5263 | return DAG.getNode(ISD::MULHU, dl, VT, X, Y); |
5264 | if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) |
5265 | : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) { |
5266 | SDValue LoHi = |
5267 | DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y); |
5268 | return SDValue(LoHi.getNode(), 1); |
5269 | } |
5270 | return SDValue(); // No mulhu or equivalent |
5271 | }; |
5272 | |
5273 | // Multiply the numerator (operand 0) by the magic value. |
5274 | Q = GetMULHU(Q, MagicFactor); |
5275 | if (!Q) |
5276 | return SDValue(); |
5277 | |
5278 | Created.push_back(Q.getNode()); |
5279 | |
5280 | if (UseNPQ) { |
5281 | SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q); |
5282 | Created.push_back(NPQ.getNode()); |
5283 | |
5284 | // For vectors we might have a mix of non-NPQ/NPQ paths, so use |
5285 | // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero. |
5286 | if (VT.isVector()) |
5287 | NPQ = GetMULHU(NPQ, NPQFactor); |
5288 | else |
5289 | NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT)); |
5290 | |
5291 | Created.push_back(NPQ.getNode()); |
5292 | |
5293 | Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); |
5294 | Created.push_back(Q.getNode()); |
5295 | } |
5296 | |
5297 | Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift); |
5298 | Created.push_back(Q.getNode()); |
5299 | |
5300 | SDValue One = DAG.getConstant(1, dl, VT); |
5301 | SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ); |
5302 | return DAG.getSelect(dl, VT, IsOne, N0, Q); |
5303 | } |
5304 | |
5305 | /// If all values in Values that *don't* match the predicate are same 'splat' |
5306 | /// value, then replace all values with that splat value. |
5307 | /// Else, if AlternativeReplacement was provided, then replace all values that |
5308 | /// do match predicate with AlternativeReplacement value. |
5309 | static void |
5310 | turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values, |
5311 | std::function<bool(SDValue)> Predicate, |
5312 | SDValue AlternativeReplacement = SDValue()) { |
5313 | SDValue Replacement; |
5314 | // Is there a value for which the Predicate does *NOT* match? What is it? |
5315 | auto SplatValue = llvm::find_if_not(Values, Predicate); |
5316 | if (SplatValue != Values.end()) { |
5317 | // Does Values consist only of SplatValue's and values matching Predicate? |
5318 | if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) { |
5319 | return Value == *SplatValue || Predicate(Value); |
5320 | })) // Then we shall replace values matching predicate with SplatValue. |
5321 | Replacement = *SplatValue; |
5322 | } |
5323 | if (!Replacement) { |
5324 | // Oops, we did not find the "baseline" splat value. |
5325 | if (!AlternativeReplacement) |
5326 | return; // Nothing to do. |
5327 | // Let's replace with provided value then. |
5328 | Replacement = AlternativeReplacement; |
5329 | } |
5330 | std::replace_if(Values.begin(), Values.end(), Predicate, Replacement); |
5331 | } |
5332 | |
5333 | /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE |
5334 | /// where the divisor is constant and the comparison target is zero, |
5335 | /// return a DAG expression that will generate the same comparison result |
5336 | /// using only multiplications, additions and shifts/rotations. |
5337 | /// Ref: "Hacker's Delight" 10-17. |
5338 | SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode, |
5339 | SDValue CompTargetNode, |
5340 | ISD::CondCode Cond, |
5341 | DAGCombinerInfo &DCI, |
5342 | const SDLoc &DL) const { |
5343 | SmallVector<SDNode *, 5> Built; |
5344 | if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond, |
5345 | DCI, DL, Built)) { |
5346 | for (SDNode *N : Built) |
5347 | DCI.AddToWorklist(N); |
5348 | return Folded; |
5349 | } |
5350 | |
5351 | return SDValue(); |
5352 | } |
5353 | |
5354 | SDValue |
5355 | TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, |
5356 | SDValue CompTargetNode, ISD::CondCode Cond, |
5357 | DAGCombinerInfo &DCI, const SDLoc &DL, |
5358 | SmallVectorImpl<SDNode *> &Created) const { |
5359 | // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q) |
5360 | // - D must be constant, with D = D0 * 2^K where D0 is odd |
5361 | // - P is the multiplicative inverse of D0 modulo 2^W |
5362 | // - Q = floor(((2^W) - 1) / D) |
5363 | // where W is the width of the common type of N and D. |
5364 | assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&(((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Only applicable for (in)equality comparisons." ) ? static_cast<void> (0) : __assert_fail ("(Cond == ISD::SETEQ || Cond == ISD::SETNE) && \"Only applicable for (in)equality comparisons.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5365, __PRETTY_FUNCTION__)) |
5365 | "Only applicable for (in)equality comparisons.")(((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Only applicable for (in)equality comparisons." ) ? static_cast<void> (0) : __assert_fail ("(Cond == ISD::SETEQ || Cond == ISD::SETNE) && \"Only applicable for (in)equality comparisons.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5365, __PRETTY_FUNCTION__)); |
5366 | |
5367 | SelectionDAG &DAG = DCI.DAG; |
5368 | |
5369 | EVT VT = REMNode.getValueType(); |
5370 | EVT SVT = VT.getScalarType(); |
5371 | EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); |
5372 | EVT ShSVT = ShVT.getScalarType(); |
5373 | |
5374 | // If MUL is unavailable, we cannot proceed in any case. |
5375 | if (!isOperationLegalOrCustom(ISD::MUL, VT)) |
5376 | return SDValue(); |
5377 | |
5378 | bool ComparingWithAllZeros = true; |
5379 | bool AllComparisonsWithNonZerosAreTautological = true; |
5380 | bool HadTautologicalLanes = false; |
5381 | bool AllLanesAreTautological = true; |
5382 | bool HadEvenDivisor = false; |
5383 | bool AllDivisorsArePowerOfTwo = true; |
5384 | bool HadTautologicalInvertedLanes = false; |
5385 | SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts; |
5386 | |
5387 | auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) { |
5388 | // Division by 0 is UB. Leave it to be constant-folded elsewhere. |
5389 | if (CDiv->isNullValue()) |
5390 | return false; |
5391 | |
5392 | const APInt &D = CDiv->getAPIntValue(); |
5393 | const APInt &Cmp = CCmp->getAPIntValue(); |
5394 | |
5395 | ComparingWithAllZeros &= Cmp.isNullValue(); |
5396 | |
5397 | // x u% C1` is *always* less than C1. So given `x u% C1 == C2`, |
5398 | // if C2 is not less than C1, the comparison is always false. |
5399 | // But we will only be able to produce the comparison that will give the |
5400 | // opposive tautological answer. So this lane would need to be fixed up. |
5401 | bool TautologicalInvertedLane = D.ule(Cmp); |
5402 | HadTautologicalInvertedLanes |= TautologicalInvertedLane; |
5403 | |
5404 | // If all lanes are tautological (either all divisors are ones, or divisor |
5405 | // is not greater than the constant we are comparing with), |
5406 | // we will prefer to avoid the fold. |
5407 | bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane; |
5408 | HadTautologicalLanes |= TautologicalLane; |
5409 | AllLanesAreTautological &= TautologicalLane; |
5410 | |
5411 | // If we are comparing with non-zero, we need'll need to subtract said |
5412 | // comparison value from the LHS. But there is no point in doing that if |
5413 | // every lane where we are comparing with non-zero is tautological.. |
5414 | if (!Cmp.isNullValue()) |
5415 | AllComparisonsWithNonZerosAreTautological &= TautologicalLane; |
5416 | |
5417 | // Decompose D into D0 * 2^K |
5418 | unsigned K = D.countTrailingZeros(); |
5419 | assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.")(((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate." ) ? static_cast<void> (0) : __assert_fail ("(!D.isOneValue() || (K == 0)) && \"For divisor '1' we won't rotate.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5419, __PRETTY_FUNCTION__)); |
5420 | APInt D0 = D.lshr(K); |
5421 | |
5422 | // D is even if it has trailing zeros. |
5423 | HadEvenDivisor |= (K != 0); |
5424 | // D is a power-of-two if D0 is one. |
5425 | // If all divisors are power-of-two, we will prefer to avoid the fold. |
5426 | AllDivisorsArePowerOfTwo &= D0.isOneValue(); |
5427 | |
5428 | // P = inv(D0, 2^W) |
5429 | // 2^W requires W + 1 bits, so we have to extend and then truncate. |
5430 | unsigned W = D.getBitWidth(); |
5431 | APInt P = D0.zext(W + 1) |
5432 | .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) |
5433 | .trunc(W); |
5434 | assert(!P.isNullValue() && "No multiplicative inverse!")((!P.isNullValue() && "No multiplicative inverse!") ? static_cast<void> (0) : __assert_fail ("!P.isNullValue() && \"No multiplicative inverse!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5434, __PRETTY_FUNCTION__)); // unreachable |
5435 | assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.")(((D0 * P).isOneValue() && "Multiplicative inverse sanity check." ) ? static_cast<void> (0) : __assert_fail ("(D0 * P).isOneValue() && \"Multiplicative inverse sanity check.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5435, __PRETTY_FUNCTION__)); |
5436 | |
5437 | // Q = floor((2^W - 1) u/ D) |
5438 | // R = ((2^W - 1) u% D) |
5439 | APInt Q, R; |
5440 | APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R); |
5441 | |
5442 | // If we are comparing with zero, then that comparison constant is okay, |
5443 | // else it may need to be one less than that. |
5444 | if (Cmp.ugt(R)) |
5445 | Q -= 1; |
5446 | |
5447 | assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&((APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && "We are expecting that K is always less than all-ones for ShSVT" ) ? static_cast<void> (0) : __assert_fail ("APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && \"We are expecting that K is always less than all-ones for ShSVT\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5448, __PRETTY_FUNCTION__)) |
5448 | "We are expecting that K is always less than all-ones for ShSVT")((APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && "We are expecting that K is always less than all-ones for ShSVT" ) ? static_cast<void> (0) : __assert_fail ("APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && \"We are expecting that K is always less than all-ones for ShSVT\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5448, __PRETTY_FUNCTION__)); |
5449 | |
5450 | // If the lane is tautological the result can be constant-folded. |
5451 | if (TautologicalLane) { |
5452 | // Set P and K amount to a bogus values so we can try to splat them. |
5453 | P = 0; |
5454 | K = -1; |
5455 | // And ensure that comparison constant is tautological, |
5456 | // it will always compare true/false. |
5457 | Q = -1; |
5458 | } |
5459 | |
5460 | PAmts.push_back(DAG.getConstant(P, DL, SVT)); |
5461 | KAmts.push_back( |
5462 | DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT)); |
5463 | QAmts.push_back(DAG.getConstant(Q, DL, SVT)); |
5464 | return true; |
5465 | }; |
5466 | |
5467 | SDValue N = REMNode.getOperand(0); |
5468 | SDValue D = REMNode.getOperand(1); |
5469 | |
5470 | // Collect the values from each element. |
5471 | if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern)) |
5472 | return SDValue(); |
5473 | |
5474 | // If all lanes are tautological, the result can be constant-folded. |
5475 | if (AllLanesAreTautological) |
5476 | return SDValue(); |
5477 | |
5478 | // If this is a urem by a powers-of-two, avoid the fold since it can be |
5479 | // best implemented as a bit test. |
5480 | if (AllDivisorsArePowerOfTwo) |
5481 | return SDValue(); |
5482 | |
5483 | SDValue PVal, KVal, QVal; |
5484 | if (VT.isVector()) { |
5485 | if (HadTautologicalLanes) { |
5486 | // Try to turn PAmts into a splat, since we don't care about the values |
5487 | // that are currently '0'. If we can't, just keep '0'`s. |
5488 | turnVectorIntoSplatVector(PAmts, isNullConstant); |
5489 | // Try to turn KAmts into a splat, since we don't care about the values |
5490 | // that are currently '-1'. If we can't, change them to '0'`s. |
5491 | turnVectorIntoSplatVector(KAmts, isAllOnesConstant, |
5492 | DAG.getConstant(0, DL, ShSVT)); |
5493 | } |
5494 | |
5495 | PVal = DAG.getBuildVector(VT, DL, PAmts); |
5496 | KVal = DAG.getBuildVector(ShVT, DL, KAmts); |
5497 | QVal = DAG.getBuildVector(VT, DL, QAmts); |
5498 | } else { |
5499 | PVal = PAmts[0]; |
5500 | KVal = KAmts[0]; |
5501 | QVal = QAmts[0]; |
5502 | } |
5503 | |
5504 | if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) { |
5505 | if (!isOperationLegalOrCustom(ISD::SUB, VT)) |
5506 | return SDValue(); // FIXME: Could/should use `ISD::ADD`? |
5507 | assert(CompTargetNode.getValueType() == N.getValueType() &&((CompTargetNode.getValueType() == N.getValueType() && "Expecting that the types on LHS and RHS of comparisons match." ) ? static_cast<void> (0) : __assert_fail ("CompTargetNode.getValueType() == N.getValueType() && \"Expecting that the types on LHS and RHS of comparisons match.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5508, __PRETTY_FUNCTION__)) |
5508 | "Expecting that the types on LHS and RHS of comparisons match.")((CompTargetNode.getValueType() == N.getValueType() && "Expecting that the types on LHS and RHS of comparisons match." ) ? static_cast<void> (0) : __assert_fail ("CompTargetNode.getValueType() == N.getValueType() && \"Expecting that the types on LHS and RHS of comparisons match.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5508, __PRETTY_FUNCTION__)); |
5509 | N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode); |
5510 | } |
5511 | |
5512 | // (mul N, P) |
5513 | SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal); |
5514 | Created.push_back(Op0.getNode()); |
5515 | |
5516 | // Rotate right only if any divisor was even. We avoid rotates for all-odd |
5517 | // divisors as a performance improvement, since rotating by 0 is a no-op. |
5518 | if (HadEvenDivisor) { |
5519 | // We need ROTR to do this. |
5520 | if (!isOperationLegalOrCustom(ISD::ROTR, VT)) |
5521 | return SDValue(); |
5522 | SDNodeFlags Flags; |
5523 | Flags.setExact(true); |
5524 | // UREM: (rotr (mul N, P), K) |
5525 | Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags); |
5526 | Created.push_back(Op0.getNode()); |
5527 | } |
5528 | |
5529 | // UREM: (setule/setugt (rotr (mul N, P), K), Q) |
5530 | SDValue NewCC = |
5531 | DAG.getSetCC(DL, SETCCVT, Op0, QVal, |
5532 | ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); |
5533 | if (!HadTautologicalInvertedLanes) |
5534 | return NewCC; |
5535 | |
5536 | // If any lanes previously compared always-false, the NewCC will give |
5537 | // always-true result for them, so we need to fixup those lanes. |
5538 | // Or the other way around for inequality predicate. |
5539 | assert(VT.isVector() && "Can/should only get here for vectors.")((VT.isVector() && "Can/should only get here for vectors." ) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Can/should only get here for vectors.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5539, __PRETTY_FUNCTION__)); |
5540 | Created.push_back(NewCC.getNode()); |
5541 | |
5542 | // x u% C1` is *always* less than C1. So given `x u% C1 == C2`, |
5543 | // if C2 is not less than C1, the comparison is always false. |
5544 | // But we have produced the comparison that will give the |
5545 | // opposive tautological answer. So these lanes would need to be fixed up. |
5546 | SDValue TautologicalInvertedChannels = |
5547 | DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE); |
5548 | Created.push_back(TautologicalInvertedChannels.getNode()); |
5549 | |
5550 | if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) { |
5551 | // If we have a vector select, let's replace the comparison results in the |
5552 | // affected lanes with the correct tautological result. |
5553 | SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true, |
5554 | DL, SETCCVT, SETCCVT); |
5555 | return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels, |
5556 | Replacement, NewCC); |
5557 | } |
5558 | |
5559 | // Else, we can just invert the comparison result in the appropriate lanes. |
5560 | if (isOperationLegalOrCustom(ISD::XOR, SETCCVT)) |
5561 | return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC, |
5562 | TautologicalInvertedChannels); |
5563 | |
5564 | return SDValue(); // Don't know how to lower. |
5565 | } |
5566 | |
5567 | /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE |
5568 | /// where the divisor is constant and the comparison target is zero, |
5569 | /// return a DAG expression that will generate the same comparison result |
5570 | /// using only multiplications, additions and shifts/rotations. |
5571 | /// Ref: "Hacker's Delight" 10-17. |
5572 | SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode, |
5573 | SDValue CompTargetNode, |
5574 | ISD::CondCode Cond, |
5575 | DAGCombinerInfo &DCI, |
5576 | const SDLoc &DL) const { |
5577 | SmallVector<SDNode *, 7> Built; |
5578 | if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond, |
5579 | DCI, DL, Built)) { |
5580 | assert(Built.size() <= 7 && "Max size prediction failed.")((Built.size() <= 7 && "Max size prediction failed." ) ? static_cast<void> (0) : __assert_fail ("Built.size() <= 7 && \"Max size prediction failed.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5580, __PRETTY_FUNCTION__)); |
5581 | for (SDNode *N : Built) |
5582 | DCI.AddToWorklist(N); |
5583 | return Folded; |
5584 | } |
5585 | |
5586 | return SDValue(); |
5587 | } |
5588 | |
5589 | SDValue |
5590 | TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, |
5591 | SDValue CompTargetNode, ISD::CondCode Cond, |
5592 | DAGCombinerInfo &DCI, const SDLoc &DL, |
5593 | SmallVectorImpl<SDNode *> &Created) const { |
5594 | // Fold: |
5595 | // (seteq/ne (srem N, D), 0) |
5596 | // To: |
5597 | // (setule/ugt (rotr (add (mul N, P), A), K), Q) |
5598 | // |
5599 | // - D must be constant, with D = D0 * 2^K where D0 is odd |
5600 | // - P is the multiplicative inverse of D0 modulo 2^W |
5601 | // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k))) |
5602 | // - Q = floor((2 * A) / (2^K)) |
5603 | // where W is the width of the common type of N and D. |
5604 | assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&(((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Only applicable for (in)equality comparisons." ) ? static_cast<void> (0) : __assert_fail ("(Cond == ISD::SETEQ || Cond == ISD::SETNE) && \"Only applicable for (in)equality comparisons.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5605, __PRETTY_FUNCTION__)) |
5605 | "Only applicable for (in)equality comparisons.")(((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Only applicable for (in)equality comparisons." ) ? static_cast<void> (0) : __assert_fail ("(Cond == ISD::SETEQ || Cond == ISD::SETNE) && \"Only applicable for (in)equality comparisons.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5605, __PRETTY_FUNCTION__)); |
5606 | |
5607 | SelectionDAG &DAG = DCI.DAG; |
5608 | |
5609 | EVT VT = REMNode.getValueType(); |
5610 | EVT SVT = VT.getScalarType(); |
5611 | EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); |
5612 | EVT ShSVT = ShVT.getScalarType(); |
5613 | |
5614 | // If MUL is unavailable, we cannot proceed in any case. |
5615 | if (!isOperationLegalOrCustom(ISD::MUL, VT)) |
5616 | return SDValue(); |
5617 | |
5618 | // TODO: Could support comparing with non-zero too. |
5619 | ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); |
5620 | if (!CompTarget || !CompTarget->isNullValue()) |
5621 | return SDValue(); |
5622 | |
5623 | bool HadIntMinDivisor = false; |
5624 | bool HadOneDivisor = false; |
5625 | bool AllDivisorsAreOnes = true; |
5626 | bool HadEvenDivisor = false; |
5627 | bool NeedToApplyOffset = false; |
5628 | bool AllDivisorsArePowerOfTwo = true; |
5629 | SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts; |
5630 | |
5631 | auto BuildSREMPattern = [&](ConstantSDNode *C) { |
5632 | // Division by 0 is UB. Leave it to be constant-folded elsewhere. |
5633 | if (C->isNullValue()) |
5634 | return false; |
5635 | |
5636 | // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine. |
5637 | |
5638 | // WARNING: this fold is only valid for positive divisors! |
5639 | APInt D = C->getAPIntValue(); |
5640 | if (D.isNegative()) |
5641 | D.negate(); // `rem %X, -C` is equivalent to `rem %X, C` |
5642 | |
5643 | HadIntMinDivisor |= D.isMinSignedValue(); |
5644 | |
5645 | // If all divisors are ones, we will prefer to avoid the fold. |
5646 | HadOneDivisor |= D.isOneValue(); |
5647 | AllDivisorsAreOnes &= D.isOneValue(); |
5648 | |
5649 | // Decompose D into D0 * 2^K |
5650 | unsigned K = D.countTrailingZeros(); |
5651 | assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.")(((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate." ) ? static_cast<void> (0) : __assert_fail ("(!D.isOneValue() || (K == 0)) && \"For divisor '1' we won't rotate.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5651, __PRETTY_FUNCTION__)); |
5652 | APInt D0 = D.lshr(K); |
5653 | |
5654 | if (!D.isMinSignedValue()) { |
5655 | // D is even if it has trailing zeros; unless it's INT_MIN, in which case |
5656 | // we don't care about this lane in this fold, we'll special-handle it. |
5657 | HadEvenDivisor |= (K != 0); |
5658 | } |
5659 | |
5660 | // D is a power-of-two if D0 is one. This includes INT_MIN. |
5661 | // If all divisors are power-of-two, we will prefer to avoid the fold. |
5662 | AllDivisorsArePowerOfTwo &= D0.isOneValue(); |
5663 | |
5664 | // P = inv(D0, 2^W) |
5665 | // 2^W requires W + 1 bits, so we have to extend and then truncate. |
5666 | unsigned W = D.getBitWidth(); |
5667 | APInt P = D0.zext(W + 1) |
5668 | .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) |
5669 | .trunc(W); |
5670 | assert(!P.isNullValue() && "No multiplicative inverse!")((!P.isNullValue() && "No multiplicative inverse!") ? static_cast<void> (0) : __assert_fail ("!P.isNullValue() && \"No multiplicative inverse!\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5670, __PRETTY_FUNCTION__)); // unreachable |
5671 | assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.")(((D0 * P).isOneValue() && "Multiplicative inverse sanity check." ) ? static_cast<void> (0) : __assert_fail ("(D0 * P).isOneValue() && \"Multiplicative inverse sanity check.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5671, __PRETTY_FUNCTION__)); |
5672 | |
5673 | // A = floor((2^(W - 1) - 1) / D0) & -2^K |
5674 | APInt A = APInt::getSignedMaxValue(W).udiv(D0); |
5675 | A.clearLowBits(K); |
5676 | |
5677 | if (!D.isMinSignedValue()) { |
5678 | // If divisor INT_MIN, then we don't care about this lane in this fold, |
5679 | // we'll special-handle it. |
5680 | NeedToApplyOffset |= A != 0; |
5681 | } |
5682 | |
5683 | // Q = floor((2 * A) / (2^K)) |
5684 | APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K)); |
5685 | |
5686 | assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&((APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) && "We are expecting that A is always less than all-ones for SVT" ) ? static_cast<void> (0) : __assert_fail ("APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) && \"We are expecting that A is always less than all-ones for SVT\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5687, __PRETTY_FUNCTION__)) |
5687 | "We are expecting that A is always less than all-ones for SVT")((APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) && "We are expecting that A is always less than all-ones for SVT" ) ? static_cast<void> (0) : __assert_fail ("APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) && \"We are expecting that A is always less than all-ones for SVT\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5687, __PRETTY_FUNCTION__)); |
5688 | assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&((APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && "We are expecting that K is always less than all-ones for ShSVT" ) ? static_cast<void> (0) : __assert_fail ("APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && \"We are expecting that K is always less than all-ones for ShSVT\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5689, __PRETTY_FUNCTION__)) |
5689 | "We are expecting that K is always less than all-ones for ShSVT")((APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && "We are expecting that K is always less than all-ones for ShSVT" ) ? static_cast<void> (0) : __assert_fail ("APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && \"We are expecting that K is always less than all-ones for ShSVT\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5689, __PRETTY_FUNCTION__)); |
5690 | |
5691 | // If the divisor is 1 the result can be constant-folded. Likewise, we |
5692 | // don't care about INT_MIN lanes, those can be set to undef if appropriate. |
5693 | if (D.isOneValue()) { |
5694 | // Set P, A and K to a bogus values so we can try to splat them. |
5695 | P = 0; |
5696 | A = -1; |
5697 | K = -1; |
5698 | |
5699 | // x ?% 1 == 0 <--> true <--> x u<= -1 |
5700 | Q = -1; |
5701 | } |
5702 | |
5703 | PAmts.push_back(DAG.getConstant(P, DL, SVT)); |
5704 | AAmts.push_back(DAG.getConstant(A, DL, SVT)); |
5705 | KAmts.push_back( |
5706 | DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT)); |
5707 | QAmts.push_back(DAG.getConstant(Q, DL, SVT)); |
5708 | return true; |
5709 | }; |
5710 | |
5711 | SDValue N = REMNode.getOperand(0); |
5712 | SDValue D = REMNode.getOperand(1); |
5713 | |
5714 | // Collect the values from each element. |
5715 | if (!ISD::matchUnaryPredicate(D, BuildSREMPattern)) |
5716 | return SDValue(); |
5717 | |
5718 | // If this is a srem by a one, avoid the fold since it can be constant-folded. |
5719 | if (AllDivisorsAreOnes) |
5720 | return SDValue(); |
5721 | |
5722 | // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold |
5723 | // since it can be best implemented as a bit test. |
5724 | if (AllDivisorsArePowerOfTwo) |
5725 | return SDValue(); |
5726 | |
5727 | SDValue PVal, AVal, KVal, QVal; |
5728 | if (VT.isVector()) { |
5729 | if (HadOneDivisor) { |
5730 | // Try to turn PAmts into a splat, since we don't care about the values |
5731 | // that are currently '0'. If we can't, just keep '0'`s. |
5732 | turnVectorIntoSplatVector(PAmts, isNullConstant); |
5733 | // Try to turn AAmts into a splat, since we don't care about the |
5734 | // values that are currently '-1'. If we can't, change them to '0'`s. |
5735 | turnVectorIntoSplatVector(AAmts, isAllOnesConstant, |
5736 | DAG.getConstant(0, DL, SVT)); |
5737 | // Try to turn KAmts into a splat, since we don't care about the values |
5738 | // that are currently '-1'. If we can't, change them to '0'`s. |
5739 | turnVectorIntoSplatVector(KAmts, isAllOnesConstant, |
5740 | DAG.getConstant(0, DL, ShSVT)); |
5741 | } |
5742 | |
5743 | PVal = DAG.getBuildVector(VT, DL, PAmts); |
5744 | AVal = DAG.getBuildVector(VT, DL, AAmts); |
5745 | KVal = DAG.getBuildVector(ShVT, DL, KAmts); |
5746 | QVal = DAG.getBuildVector(VT, DL, QAmts); |
5747 | } else { |
5748 | PVal = PAmts[0]; |
5749 | AVal = AAmts[0]; |
5750 | KVal = KAmts[0]; |
5751 | QVal = QAmts[0]; |
5752 | } |
5753 | |
5754 | // (mul N, P) |
5755 | SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal); |
5756 | Created.push_back(Op0.getNode()); |
5757 | |
5758 | if (NeedToApplyOffset) { |
5759 | // We need ADD to do this. |
5760 | if (!isOperationLegalOrCustom(ISD::ADD, VT)) |
5761 | return SDValue(); |
5762 | |
5763 | // (add (mul N, P), A) |
5764 | Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal); |
5765 | Created.push_back(Op0.getNode()); |
5766 | } |
5767 | |
5768 | // Rotate right only if any divisor was even. We avoid rotates for all-odd |
5769 | // divisors as a performance improvement, since rotating by 0 is a no-op. |
5770 | if (HadEvenDivisor) { |
5771 | // We need ROTR to do this. |
5772 | if (!isOperationLegalOrCustom(ISD::ROTR, VT)) |
5773 | return SDValue(); |
5774 | SDNodeFlags Flags; |
5775 | Flags.setExact(true); |
5776 | // SREM: (rotr (add (mul N, P), A), K) |
5777 | Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags); |
5778 | Created.push_back(Op0.getNode()); |
5779 | } |
5780 | |
5781 | // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q) |
5782 | SDValue Fold = |
5783 | DAG.getSetCC(DL, SETCCVT, Op0, QVal, |
5784 | ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); |
5785 | |
5786 | // If we didn't have lanes with INT_MIN divisor, then we're done. |
5787 | if (!HadIntMinDivisor) |
5788 | return Fold; |
5789 | |
5790 | // That fold is only valid for positive divisors. Which effectively means, |
5791 | // it is invalid for INT_MIN divisors. So if we have such a lane, |
5792 | // we must fix-up results for said lanes. |
5793 | assert(VT.isVector() && "Can/should only get here for vectors.")((VT.isVector() && "Can/should only get here for vectors." ) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Can/should only get here for vectors.\"" , "/build/llvm-toolchain-snapshot-12~++20210114111115+2b1e25befefc/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp" , 5793, __PRETTY_FUNCTION__)); |
5794 | |