Line data Source code
1 : //===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This implements routines for translating from LLVM IR into SelectionDAG IR.
11 : //
12 : //===----------------------------------------------------------------------===//
13 :
14 : #include "SelectionDAGBuilder.h"
15 : #include "SDNodeDbgValue.h"
16 : #include "llvm/ADT/APFloat.h"
17 : #include "llvm/ADT/APInt.h"
18 : #include "llvm/ADT/ArrayRef.h"
19 : #include "llvm/ADT/BitVector.h"
20 : #include "llvm/ADT/DenseMap.h"
21 : #include "llvm/ADT/None.h"
22 : #include "llvm/ADT/Optional.h"
23 : #include "llvm/ADT/STLExtras.h"
24 : #include "llvm/ADT/SmallPtrSet.h"
25 : #include "llvm/ADT/SmallSet.h"
26 : #include "llvm/ADT/SmallVector.h"
27 : #include "llvm/ADT/StringRef.h"
28 : #include "llvm/ADT/Triple.h"
29 : #include "llvm/ADT/Twine.h"
30 : #include "llvm/Analysis/AliasAnalysis.h"
31 : #include "llvm/Analysis/BranchProbabilityInfo.h"
32 : #include "llvm/Analysis/ConstantFolding.h"
33 : #include "llvm/Analysis/EHPersonalities.h"
34 : #include "llvm/Analysis/Loads.h"
35 : #include "llvm/Analysis/MemoryLocation.h"
36 : #include "llvm/Analysis/TargetLibraryInfo.h"
37 : #include "llvm/Analysis/ValueTracking.h"
38 : #include "llvm/Analysis/VectorUtils.h"
39 : #include "llvm/CodeGen/Analysis.h"
40 : #include "llvm/CodeGen/FunctionLoweringInfo.h"
41 : #include "llvm/CodeGen/GCMetadata.h"
42 : #include "llvm/CodeGen/ISDOpcodes.h"
43 : #include "llvm/CodeGen/MachineBasicBlock.h"
44 : #include "llvm/CodeGen/MachineFrameInfo.h"
45 : #include "llvm/CodeGen/MachineFunction.h"
46 : #include "llvm/CodeGen/MachineInstr.h"
47 : #include "llvm/CodeGen/MachineInstrBuilder.h"
48 : #include "llvm/CodeGen/MachineJumpTableInfo.h"
49 : #include "llvm/CodeGen/MachineMemOperand.h"
50 : #include "llvm/CodeGen/MachineModuleInfo.h"
51 : #include "llvm/CodeGen/MachineOperand.h"
52 : #include "llvm/CodeGen/MachineRegisterInfo.h"
53 : #include "llvm/CodeGen/RuntimeLibcalls.h"
54 : #include "llvm/CodeGen/SelectionDAG.h"
55 : #include "llvm/CodeGen/SelectionDAGNodes.h"
56 : #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
57 : #include "llvm/CodeGen/StackMaps.h"
58 : #include "llvm/CodeGen/TargetFrameLowering.h"
59 : #include "llvm/CodeGen/TargetInstrInfo.h"
60 : #include "llvm/CodeGen/TargetLowering.h"
61 : #include "llvm/CodeGen/TargetOpcodes.h"
62 : #include "llvm/CodeGen/TargetRegisterInfo.h"
63 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
64 : #include "llvm/CodeGen/ValueTypes.h"
65 : #include "llvm/CodeGen/WinEHFuncInfo.h"
66 : #include "llvm/IR/Argument.h"
67 : #include "llvm/IR/Attributes.h"
68 : #include "llvm/IR/BasicBlock.h"
69 : #include "llvm/IR/CFG.h"
70 : #include "llvm/IR/CallSite.h"
71 : #include "llvm/IR/CallingConv.h"
72 : #include "llvm/IR/Constant.h"
73 : #include "llvm/IR/ConstantRange.h"
74 : #include "llvm/IR/Constants.h"
75 : #include "llvm/IR/DataLayout.h"
76 : #include "llvm/IR/DebugInfoMetadata.h"
77 : #include "llvm/IR/DebugLoc.h"
78 : #include "llvm/IR/DerivedTypes.h"
79 : #include "llvm/IR/Function.h"
80 : #include "llvm/IR/GetElementPtrTypeIterator.h"
81 : #include "llvm/IR/InlineAsm.h"
82 : #include "llvm/IR/InstrTypes.h"
83 : #include "llvm/IR/Instruction.h"
84 : #include "llvm/IR/Instructions.h"
85 : #include "llvm/IR/IntrinsicInst.h"
86 : #include "llvm/IR/Intrinsics.h"
87 : #include "llvm/IR/LLVMContext.h"
88 : #include "llvm/IR/Metadata.h"
89 : #include "llvm/IR/Module.h"
90 : #include "llvm/IR/Operator.h"
91 : #include "llvm/IR/Statepoint.h"
92 : #include "llvm/IR/Type.h"
93 : #include "llvm/IR/User.h"
94 : #include "llvm/IR/Value.h"
95 : #include "llvm/MC/MCContext.h"
96 : #include "llvm/MC/MCSymbol.h"
97 : #include "llvm/Support/AtomicOrdering.h"
98 : #include "llvm/Support/BranchProbability.h"
99 : #include "llvm/Support/Casting.h"
100 : #include "llvm/Support/CodeGen.h"
101 : #include "llvm/Support/CommandLine.h"
102 : #include "llvm/Support/Compiler.h"
103 : #include "llvm/Support/Debug.h"
104 : #include "llvm/Support/ErrorHandling.h"
105 : #include "llvm/Support/MachineValueType.h"
106 : #include "llvm/Support/MathExtras.h"
107 : #include "llvm/Support/raw_ostream.h"
108 : #include "llvm/Target/TargetIntrinsicInfo.h"
109 : #include "llvm/Target/TargetMachine.h"
110 : #include "llvm/Target/TargetOptions.h"
111 : #include <algorithm>
112 : #include <cassert>
113 : #include <cstddef>
114 : #include <cstdint>
115 : #include <cstring>
116 : #include <iterator>
117 : #include <limits>
118 : #include <numeric>
119 : #include <tuple>
120 : #include <utility>
121 : #include <vector>
122 :
123 : using namespace llvm;
124 :
125 : #define DEBUG_TYPE "isel"
126 :
127 : /// LimitFloatPrecision - Generate low-precision inline sequences for
128 : /// some float libcalls (6, 8 or 12 bits).
129 : static unsigned LimitFloatPrecision;
130 :
131 : static cl::opt<unsigned, true>
132 : LimitFPPrecision("limit-float-precision",
133 : cl::desc("Generate low-precision inline sequences "
134 : "for some float libcalls"),
135 : cl::location(LimitFloatPrecision), cl::Hidden,
136 : cl::init(0));
137 :
138 : static cl::opt<unsigned> SwitchPeelThreshold(
139 : "switch-peel-threshold", cl::Hidden, cl::init(66),
140 : cl::desc("Set the case probability threshold for peeling the case from a "
141 : "switch statement. A value greater than 100 will void this "
142 : "optimization"));
143 :
144 : // Limit the width of DAG chains. This is important in general to prevent
145 : // DAG-based analysis from blowing up. For example, alias analysis and
146 : // load clustering may not complete in reasonable time. It is difficult to
147 : // recognize and avoid this situation within each individual analysis, and
148 : // future analyses are likely to have the same behavior. Limiting DAG width is
149 : // the safe approach and will be especially important with global DAGs.
150 : //
151 : // MaxParallelChains default is arbitrarily high to avoid affecting
152 : // optimization, but could be lowered to improve compile time. Any ld-ld-st-st
153 : // sequence over this should have been converted to llvm.memcpy by the
154 : // frontend. It is easy to induce this behavior with .ll code such as:
155 : // %buffer = alloca [4096 x i8]
156 : // %data = load [4096 x i8]* %argPtr
157 : // store [4096 x i8] %data, [4096 x i8]* %buffer
158 : static const unsigned MaxParallelChains = 64;
159 :
160 : // Return the calling convention if the Value passed requires ABI mangling as it
161 : // is a parameter to a function or a return value from a function which is not
162 : // an intrinsic.
163 19523 : static Optional<CallingConv::ID> getABIRegCopyCC(const Value *V) {
164 : if (auto *R = dyn_cast<ReturnInst>(V))
165 0 : return R->getParent()->getParent()->getCallingConv();
166 :
167 : if (auto *CI = dyn_cast<CallInst>(V)) {
168 : const bool IsInlineAsm = CI->isInlineAsm();
169 : const bool IsIndirectFunctionCall =
170 187 : !IsInlineAsm && !CI->getCalledFunction();
171 :
172 : // It is possible that the call instruction is an inline asm statement or an
173 : // indirect function call in which case the return value of
174 : // getCalledFunction() would be nullptr.
175 : const bool IsInstrinsicCall =
176 187 : !IsInlineAsm && !IsIndirectFunctionCall &&
177 182 : CI->getCalledFunction()->getIntrinsicID() != Intrinsic::not_intrinsic;
178 :
179 187 : if (!IsInlineAsm && !IsInstrinsicCall)
180 : return CI->getCallingConv();
181 : }
182 :
183 : return None;
184 : }
185 :
186 : static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
187 : const SDValue *Parts, unsigned NumParts,
188 : MVT PartVT, EVT ValueVT, const Value *V,
189 : Optional<CallingConv::ID> CC);
190 :
191 : /// getCopyFromParts - Create a value that contains the specified legal parts
192 : /// combined into the value they represent. If the parts combine to a type
193 : /// larger than ValueVT then AssertOp can be used to specify whether the extra
194 : /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
195 : /// (ISD::AssertSext).
196 1404563 : static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
197 : const SDValue *Parts, unsigned NumParts,
198 : MVT PartVT, EVT ValueVT, const Value *V,
199 : Optional<CallingConv::ID> CC = None,
200 : Optional<ISD::NodeType> AssertOp = None) {
201 1404563 : if (ValueVT.isVector())
202 : return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
203 179024 : CC);
204 :
205 : assert(NumParts > 0 && "No parts to assemble!");
206 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
207 1225539 : SDValue Val = Parts[0];
208 :
209 1225539 : if (NumParts > 1) {
210 : // Assemble the value from multiple parts.
211 16377 : if (ValueVT.isInteger()) {
212 15164 : unsigned PartBits = PartVT.getSizeInBits();
213 15164 : unsigned ValueBits = ValueVT.getSizeInBits();
214 :
215 : // Assemble the power of 2 part.
216 15164 : unsigned RoundParts = NumParts & (NumParts - 1) ?
217 : 1 << Log2_32(NumParts) : NumParts;
218 15164 : unsigned RoundBits = PartBits * RoundParts;
219 : EVT RoundVT = RoundBits == ValueBits ?
220 15164 : ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
221 15164 : SDValue Lo, Hi;
222 :
223 15164 : EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
224 :
225 15164 : if (RoundParts > 2) {
226 1793 : Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
227 1793 : PartVT, HalfVT, V);
228 3586 : Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
229 1793 : RoundParts / 2, PartVT, HalfVT, V);
230 : } else {
231 13371 : Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
232 13371 : Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
233 : }
234 :
235 15164 : if (DAG.getDataLayout().isBigEndian())
236 : std::swap(Lo, Hi);
237 :
238 15164 : Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
239 :
240 15164 : if (RoundParts < NumParts) {
241 : // Assemble the trailing non-power-of-2 part.
242 223 : unsigned OddParts = NumParts - RoundParts;
243 223 : EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
244 446 : Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT,
245 223 : OddVT, V, CC);
246 :
247 : // Combine the round and odd parts.
248 223 : Lo = Val;
249 223 : if (DAG.getDataLayout().isBigEndian())
250 : std::swap(Lo, Hi);
251 223 : EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
252 223 : Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
253 223 : Hi =
254 223 : DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
255 223 : DAG.getConstant(Lo.getValueSizeInBits(), DL,
256 446 : TLI.getPointerTy(DAG.getDataLayout())));
257 223 : Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
258 223 : Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
259 : }
260 2426 : } else if (PartVT.isFloatingPoint()) {
261 : // FP split into multiple FP parts (for ppcf128)
262 : assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
263 : "Unexpected split");
264 : SDValue Lo, Hi;
265 168 : Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
266 336 : Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
267 168 : if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
268 : std::swap(Lo, Hi);
269 168 : Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
270 : } else {
271 : // FP split into integer parts (soft fp)
272 : assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
273 : !PartVT.isVector() && "Unexpected split");
274 1045 : EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
275 1045 : Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC);
276 : }
277 : }
278 :
279 : // There is now one part, held in Val. Correct it to match ValueVT.
280 : // PartEVT is the type of the register class that holds the value.
281 : // ValueVT is the type of the inline asm operation.
282 1225539 : EVT PartEVT = Val.getValueType();
283 :
284 1226389 : if (PartEVT == ValueVT)
285 1151327 : return Val;
286 :
287 74212 : if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
288 1756 : ValueVT.bitsLT(PartEVT)) {
289 : // For an FP value in an integer part, we need to truncate to the right
290 : // width first.
291 28 : PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
292 28 : Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
293 : }
294 :
295 : // Handle types that have the same size.
296 74212 : if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
297 1762 : return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
298 :
299 : // Handle types with different sizes.
300 72450 : if (PartEVT.isInteger() && ValueVT.isInteger()) {
301 71888 : if (ValueVT.bitsLT(PartEVT)) {
302 : // For a truncate, see if we have any information to
303 : // indicate whether the truncated bits will always be
304 : // zero or sign-extension.
305 71865 : if (AssertOp.hasValue())
306 52440 : Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
307 52440 : DAG.getValueType(ValueVT));
308 71865 : return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
309 : }
310 23 : return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
311 : }
312 :
313 562 : if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
314 : // FP_ROUND's are always exact here.
315 562 : if (ValueVT.bitsLT(Val.getValueType()))
316 : return DAG.getNode(
317 : ISD::FP_ROUND, DL, ValueVT, Val,
318 1124 : DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
319 :
320 0 : return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
321 : }
322 :
323 0 : llvm_unreachable("Unknown mismatch!");
324 : }
325 :
326 12 : static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
327 : const Twine &ErrMsg) {
328 : const Instruction *I = dyn_cast_or_null<Instruction>(V);
329 12 : if (!V)
330 0 : return Ctx.emitError(ErrMsg);
331 :
332 : const char *AsmError = ", possible invalid constraint for vector type";
333 : if (const CallInst *CI = dyn_cast<CallInst>(I))
334 12 : if (isa<InlineAsm>(CI->getCalledValue()))
335 12 : return Ctx.emitError(I, ErrMsg + AsmError);
336 :
337 0 : return Ctx.emitError(I, ErrMsg);
338 : }
339 :
340 : /// getCopyFromPartsVector - Create a value that contains the specified legal
341 : /// parts combined into the value they represent. If the parts combine to a
342 : /// type larger than ValueVT then AssertOp can be used to specify whether the
343 : /// extra bits are known to be zero (ISD::AssertZext) or sign extended from
344 : /// ValueVT (ISD::AssertSext).
345 179024 : static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
346 : const SDValue *Parts, unsigned NumParts,
347 : MVT PartVT, EVT ValueVT, const Value *V,
348 : Optional<CallingConv::ID> CallConv) {
349 : assert(ValueVT.isVector() && "Not a vector value");
350 : assert(NumParts > 0 && "No parts to assemble!");
351 179024 : const bool IsABIRegCopy = CallConv.hasValue();
352 :
353 179024 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
354 179024 : SDValue Val = Parts[0];
355 :
356 : // Handle a multi-element vector.
357 179024 : if (NumParts > 1) {
358 12212 : EVT IntermediateVT;
359 12212 : MVT RegisterVT;
360 : unsigned NumIntermediates;
361 : unsigned NumRegs;
362 :
363 12212 : if (IsABIRegCopy) {
364 33069 : NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
365 11023 : *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
366 11023 : NumIntermediates, RegisterVT);
367 : } else {
368 : NumRegs =
369 1189 : TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
370 : NumIntermediates, RegisterVT);
371 : }
372 :
373 : assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
374 : NumParts = NumRegs; // Silence a compiler warning.
375 : assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
376 : assert(RegisterVT.getSizeInBits() ==
377 : Parts[0].getSimpleValueType().getSizeInBits() &&
378 : "Part type sizes don't match!");
379 :
380 : // Assemble the parts into intermediate operands.
381 12212 : SmallVector<SDValue, 8> Ops(NumIntermediates);
382 12212 : if (NumIntermediates == NumParts) {
383 : // If the register was not expanded, truncate or copy the value,
384 : // as appropriate.
385 55307 : for (unsigned i = 0; i != NumParts; ++i)
386 87506 : Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
387 43753 : PartVT, IntermediateVT, V);
388 658 : } else if (NumParts > 0) {
389 : // If the intermediate type was expanded, build the intermediate
390 : // operands from the parts.
391 : assert(NumParts % NumIntermediates == 0 &&
392 : "Must expand into a divisible number of parts!");
393 658 : unsigned Factor = NumParts / NumIntermediates;
394 2132 : for (unsigned i = 0; i != NumIntermediates; ++i)
395 1474 : Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
396 1474 : PartVT, IntermediateVT, V);
397 : }
398 :
399 : // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
400 : // intermediate operands.
401 : EVT BuiltVectorTy =
402 12212 : EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
403 : (IntermediateVT.isVector()
404 : ? IntermediateVT.getVectorNumElements() * NumParts
405 19509 : : NumIntermediates));
406 12212 : Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
407 : : ISD::BUILD_VECTOR,
408 17127 : DL, BuiltVectorTy, Ops);
409 : }
410 :
411 : // There is now one part, held in Val. Correct it to match ValueVT.
412 179024 : EVT PartEVT = Val.getValueType();
413 :
414 179762 : if (PartEVT == ValueVT)
415 173309 : return Val;
416 :
417 5715 : if (PartEVT.isVector()) {
418 : // If the element type of the source/dest vectors are the same, but the
419 : // parts vector has more elements than the value vector, then we have a
420 : // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
421 : // elements we want.
422 6227 : if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
423 : assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
424 : "Cannot narrow, it would be a lossy transformation");
425 : return DAG.getNode(
426 : ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
427 1357 : DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
428 : }
429 :
430 : // Vector/Vector bitcast.
431 3513 : if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
432 1903 : return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
433 :
434 : assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
435 : "Cannot handle this kind of promotion");
436 : // Promoted vector extract
437 1610 : return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
438 :
439 : }
440 :
441 : // Trivial bitcast if the types are the same size and the destination
442 : // vector type is legal.
443 845 : if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
444 : TLI.isTypeLegal(ValueVT))
445 21 : return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
446 :
447 824 : if (ValueVT.getVectorNumElements() != 1) {
448 : // Certain ABIs require that vectors are passed as integers. For vectors
449 : // are the same size, this is an obvious bitcast.
450 194 : if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
451 106 : return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
452 88 : } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
453 : // Bitcast Val back the original type and extract the corresponding
454 : // vector we want.
455 80 : unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
456 80 : EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
457 80 : ValueVT.getVectorElementType(), Elts);
458 80 : Val = DAG.getBitcast(WiderVecType, Val);
459 : return DAG.getNode(
460 : ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
461 80 : DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
462 : }
463 :
464 16 : diagnosePossiblyInvalidConstraint(
465 8 : *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
466 8 : return DAG.getUNDEF(ValueVT);
467 : }
468 :
469 : // Handle cases such as i8 -> <1 x i1>
470 630 : EVT ValueSVT = ValueVT.getVectorElementType();
471 630 : if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
472 152 : Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
473 73 : : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
474 :
475 630 : return DAG.getBuildVector(ValueVT, DL, Val);
476 : }
477 :
478 : static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
479 : SDValue Val, SDValue *Parts, unsigned NumParts,
480 : MVT PartVT, const Value *V,
481 : Optional<CallingConv::ID> CallConv);
482 :
483 : /// getCopyToParts - Create a series of nodes that contain the specified value
484 : /// split into legal parts. If the parts contain more bits than Val, then, for
485 : /// integers, ExtendKind can be used to specify how to generate the extra bits.
486 3501505 : static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
487 : SDValue *Parts, unsigned NumParts, MVT PartVT,
488 : const Value *V,
489 : Optional<CallingConv::ID> CallConv = None,
490 : ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
491 3501505 : EVT ValueVT = Val.getValueType();
492 :
493 : // Handle the vector case separately.
494 3501505 : if (ValueVT.isVector())
495 110799 : return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
496 3490171 : CallConv);
497 :
498 3390706 : unsigned PartBits = PartVT.getSizeInBits();
499 : unsigned OrigNumParts = NumParts;
500 : assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
501 : "Copying to an illegal type!");
502 :
503 3390706 : if (NumParts == 0)
504 : return;
505 :
506 : assert(!ValueVT.isVector() && "Vector case handled elsewhere");
507 : EVT PartEVT = PartVT;
508 0 : if (PartEVT == ValueVT) {
509 : assert(NumParts == 1 && "No-op copy with multiple parts!");
510 3282892 : Parts[0] = Val;
511 3282892 : return;
512 : }
513 :
514 107814 : if (NumParts * PartBits > ValueVT.getSizeInBits()) {
515 : // If the parts cover more bits than the value has, promote the value.
516 95458 : if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
517 : assert(NumParts == 1 && "Do not know what to promote to!");
518 334 : Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
519 : } else {
520 95124 : if (ValueVT.isFloatingPoint()) {
521 : // FP values need to be bitcast, then extended if they are being put
522 : // into a larger container.
523 43 : ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
524 43 : Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
525 : }
526 : assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
527 : ValueVT.isInteger() &&
528 : "Unknown mismatch!");
529 95124 : ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
530 95124 : Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
531 95124 : if (PartVT == MVT::x86mmx)
532 1 : Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
533 : }
534 12356 : } else if (PartBits == ValueVT.getSizeInBits()) {
535 : // Different types of the same size.
536 : assert(NumParts == 1 && PartEVT != ValueVT);
537 678 : Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
538 11678 : } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
539 : // If the parts cover less bits than value has, truncate the value.
540 : assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
541 : ValueVT.isInteger() &&
542 : "Unknown mismatch!");
543 383 : ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
544 383 : Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
545 383 : if (PartVT == MVT::x86mmx)
546 0 : Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
547 : }
548 :
549 : // The value may have changed - recompute ValueVT.
550 107814 : ValueVT = Val.getValueType();
551 : assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
552 : "Failed to tile the value with PartVT!");
553 :
554 107814 : if (NumParts == 1) {
555 0 : if (PartEVT != ValueVT) {
556 4 : diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
557 : "scalar-to-vector conversion failed");
558 4 : Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
559 : }
560 :
561 96480 : Parts[0] = Val;
562 96480 : return;
563 : }
564 :
565 : // Expand the value into multiple parts.
566 11334 : if (NumParts & (NumParts - 1)) {
567 : // The number of parts is not a power of 2. Split off and copy the tail.
568 : assert(PartVT.isInteger() && ValueVT.isInteger() &&
569 : "Do not know what to expand to!");
570 : unsigned RoundParts = 1 << Log2_32(NumParts);
571 25 : unsigned RoundBits = RoundParts * PartBits;
572 25 : unsigned OddParts = NumParts - RoundParts;
573 : SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
574 25 : DAG.getIntPtrConstant(RoundBits, DL));
575 25 : getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
576 : CallConv);
577 :
578 25 : if (DAG.getDataLayout().isBigEndian())
579 : // The odd parts were reversed by getCopyToParts - unreverse them.
580 3 : std::reverse(Parts + RoundParts, Parts + NumParts);
581 :
582 : NumParts = RoundParts;
583 25 : ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
584 25 : Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
585 : }
586 :
587 : // The number of parts is a power of 2. Repeatedly bisect the value using
588 : // EXTRACT_ELEMENT.
589 11334 : Parts[0] = DAG.getNode(ISD::BITCAST, DL,
590 11334 : EVT::getIntegerVT(*DAG.getContext(),
591 : ValueVT.getSizeInBits()),
592 11334 : Val);
593 :
594 23699 : for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
595 26371 : for (unsigned i = 0; i < NumParts; i += StepSize) {
596 14006 : unsigned ThisBits = StepSize * PartBits / 2;
597 14006 : EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
598 14006 : SDValue &Part0 = Parts[i];
599 14006 : SDValue &Part1 = Parts[i+StepSize/2];
600 :
601 14006 : Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
602 14006 : ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
603 14006 : Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
604 14006 : ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
605 :
606 14006 : if (ThisBits == PartBits && ThisVT != PartVT) {
607 172 : Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
608 172 : Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
609 : }
610 : }
611 : }
612 :
613 11334 : if (DAG.getDataLayout().isBigEndian())
614 2449 : std::reverse(Parts, Parts + OrigNumParts);
615 : }
616 :
617 2336 : static SDValue widenVectorToPartType(SelectionDAG &DAG,
618 : SDValue Val, const SDLoc &DL, EVT PartVT) {
619 2336 : if (!PartVT.isVector())
620 102 : return SDValue();
621 :
622 4468 : EVT ValueVT = Val.getValueType();
623 : unsigned PartNumElts = PartVT.getVectorNumElements();
624 : unsigned ValueNumElts = ValueVT.getVectorNumElements();
625 2234 : if (PartNumElts > ValueNumElts &&
626 939 : PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
627 867 : EVT ElementVT = PartVT.getVectorElementType();
628 : // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
629 : // undef elements.
630 : SmallVector<SDValue, 16> Ops;
631 867 : DAG.ExtractVectorElements(Val, Ops);
632 867 : SDValue EltUndef = DAG.getUNDEF(ElementVT);
633 3424 : for (unsigned i = ValueNumElts, e = PartNumElts; i != e; ++i)
634 2557 : Ops.push_back(EltUndef);
635 :
636 : // FIXME: Use CONCAT for 2x -> 4x.
637 867 : return DAG.getBuildVector(PartVT, DL, Ops);
638 : }
639 :
640 1367 : return SDValue();
641 : }
642 :
643 : /// getCopyToPartsVector - Create a series of nodes that contain the specified
644 : /// value split into legal parts.
645 110799 : static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
646 : SDValue Val, SDValue *Parts, unsigned NumParts,
647 : MVT PartVT, const Value *V,
648 : Optional<CallingConv::ID> CallConv) {
649 110799 : EVT ValueVT = Val.getValueType();
650 : assert(ValueVT.isVector() && "Not a vector");
651 110799 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
652 110799 : const bool IsABIRegCopy = CallConv.hasValue();
653 :
654 110799 : if (NumParts == 1) {
655 : EVT PartEVT = PartVT;
656 0 : if (PartEVT == ValueVT) {
657 : // Nothing to do.
658 3611 : } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
659 : // Bitconvert vector->vector case.
660 1546 : Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
661 2065 : } else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) {
662 833 : Val = Widened;
663 1130 : } else if (PartVT.isVector() &&
664 2362 : PartEVT.getVectorElementType().bitsGE(
665 2362 : ValueVT.getVectorElementType()) &&
666 : PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
667 :
668 : // Promoted vector extract
669 1130 : Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
670 : } else {
671 102 : if (ValueVT.getVectorNumElements() == 1) {
672 54 : Val = DAG.getNode(
673 : ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
674 54 : DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
675 : } else {
676 : assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
677 : "lossy conversion of vector to scalar type");
678 : EVT IntermediateType =
679 48 : EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
680 48 : Val = DAG.getBitcast(IntermediateType, Val);
681 48 : Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
682 : }
683 : }
684 :
685 : assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
686 104606 : Parts[0] = Val;
687 : return;
688 : }
689 :
690 : // Handle a multi-element vector.
691 6193 : EVT IntermediateVT;
692 6193 : MVT RegisterVT;
693 : unsigned NumIntermediates;
694 : unsigned NumRegs;
695 6193 : if (IsABIRegCopy) {
696 15942 : NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
697 5314 : *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
698 5314 : NumIntermediates, RegisterVT);
699 : } else {
700 : NumRegs =
701 879 : TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
702 : NumIntermediates, RegisterVT);
703 : }
704 :
705 : assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
706 : NumParts = NumRegs; // Silence a compiler warning.
707 : assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
708 :
709 6193 : unsigned IntermediateNumElts = IntermediateVT.isVector() ?
710 : IntermediateVT.getVectorNumElements() : 1;
711 :
712 : // Convert the vector to the appropiate type if necessary.
713 6193 : unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts;
714 :
715 : EVT BuiltVectorTy = EVT::getVectorVT(
716 6193 : *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
717 6193 : MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
718 6574 : if (ValueVT != BuiltVectorTy) {
719 271 : if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
720 34 : Val = Widened;
721 :
722 271 : Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
723 : }
724 :
725 : // Split the vector into intermediate operands.
726 6193 : SmallVector<SDValue, 8> Ops(NumIntermediates);
727 24776 : for (unsigned i = 0; i != NumIntermediates; ++i) {
728 18583 : if (IntermediateVT.isVector()) {
729 9710 : Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
730 9710 : DAG.getConstant(i * IntermediateNumElts, DL, IdxVT));
731 : } else {
732 8873 : Ops[i] = DAG.getNode(
733 : ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
734 8873 : DAG.getConstant(i, DL, IdxVT));
735 : }
736 : }
737 :
738 : // Split the intermediate operands into legal parts.
739 6193 : if (NumParts == NumIntermediates) {
740 : // If the register was not expanded, promote or copy the value,
741 : // as appropriate.
742 24184 : for (unsigned i = 0; i != NumParts; ++i)
743 36250 : getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv);
744 134 : } else if (NumParts > 0) {
745 : // If the intermediate type was expanded, split each the value into
746 : // legal parts.
747 : assert(NumIntermediates != 0 && "division by zero");
748 : assert(NumParts % NumIntermediates == 0 &&
749 : "Must expand into a divisible number of parts!");
750 134 : unsigned Factor = NumParts / NumIntermediates;
751 592 : for (unsigned i = 0; i != NumIntermediates; ++i)
752 916 : getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V,
753 : CallConv);
754 : }
755 : }
756 :
757 59808 : RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt,
758 59808 : EVT valuevt, Optional<CallingConv::ID> CC)
759 : : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
760 59808 : RegCount(1, regs.size()), CallConv(CC) {}
761 :
762 1450782 : RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
763 : const DataLayout &DL, unsigned Reg, Type *Ty,
764 1450782 : Optional<CallingConv::ID> CC) {
765 1450782 : ComputeValueVTs(TLI, DL, Ty, ValueVTs);
766 :
767 : CallConv = CC;
768 :
769 3218140 : for (EVT ValueVT : ValueVTs) {
770 : unsigned NumRegs =
771 : isABIMangled()
772 1767358 : ? TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT)
773 1767358 : : TLI.getNumRegisters(Context, ValueVT);
774 : MVT RegisterVT =
775 : isABIMangled()
776 166 : ? TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT)
777 1767358 : : TLI.getRegisterType(Context, ValueVT);
778 3548053 : for (unsigned i = 0; i != NumRegs; ++i)
779 1780695 : Regs.push_back(Reg + i);
780 1767358 : RegVTs.push_back(RegisterVT);
781 1767358 : RegCount.push_back(NumRegs);
782 1767358 : Reg += NumRegs;
783 : }
784 1450782 : }
785 :
786 586725 : SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
787 : FunctionLoweringInfo &FuncInfo,
788 : const SDLoc &dl, SDValue &Chain,
789 : SDValue *Flag, const Value *V) const {
790 : // A Value with type {} or [0 x %t] needs no registers.
791 586725 : if (ValueVTs.empty())
792 0 : return SDValue();
793 :
794 586725 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
795 :
796 : // Assemble the legal parts into the final values.
797 586725 : SmallVector<SDValue, 4> Values(ValueVTs.size());
798 : SmallVector<SDValue, 8> Parts;
799 1180992 : for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
800 : // Copy the legal parts from the registers.
801 1188534 : EVT ValueVT = ValueVTs[Value];
802 594267 : unsigned NumRegs = RegCount[Value];
803 : MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
804 164 : *DAG.getContext(),
805 328 : CallConv.getValue(), RegVTs[Value])
806 594267 : : RegVTs[Value];
807 :
808 594267 : Parts.resize(NumRegs);
809 1195925 : for (unsigned i = 0; i != NumRegs; ++i) {
810 : SDValue P;
811 601658 : if (!Flag) {
812 1195132 : P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
813 : } else {
814 8184 : P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
815 4092 : *Flag = P.getValue(2);
816 : }
817 :
818 601658 : Chain = P.getValue(1);
819 601658 : Parts[i] = P;
820 :
821 : // If the source register was virtual and if we know something about it,
822 : // add an assert node.
823 1203316 : if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
824 601658 : !RegisterVT.isInteger() || RegisterVT.isVector())
825 583233 : continue;
826 :
827 : const FunctionLoweringInfo::LiveOutInfo *LOI =
828 : FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
829 : if (!LOI)
830 : continue;
831 :
832 161461 : unsigned RegSize = RegisterVT.getSizeInBits();
833 161461 : unsigned NumSignBits = LOI->NumSignBits;
834 : unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
835 :
836 161461 : if (NumZeroBits == RegSize) {
837 : // The current value is a zero.
838 : // Explicitly express that as it would be easier for
839 : // optimizations to kick in.
840 1862 : Parts[i] = DAG.getConstant(0, dl, RegisterVT);
841 1862 : continue;
842 : }
843 :
844 : // FIXME: We capture more information than the dag can represent. For
845 : // now, just use the tightest assertzext/assertsext possible.
846 : bool isSExt;
847 : EVT FromVT(MVT::Other);
848 159599 : if (NumZeroBits) {
849 15220 : FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits);
850 : isSExt = false;
851 144379 : } else if (NumSignBits > 1) {
852 : FromVT =
853 3205 : EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1);
854 : isSExt = true;
855 : } else {
856 : continue;
857 : }
858 : // Add an assertion node.
859 : assert(FromVT != MVT::Other);
860 18425 : Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
861 33645 : RegisterVT, P, DAG.getValueType(FromVT));
862 : }
863 :
864 594267 : Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs,
865 594267 : RegisterVT, ValueVT, V, CallConv);
866 594267 : Part += NumRegs;
867 : Parts.clear();
868 : }
869 :
870 586725 : return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
871 : }
872 :
873 863531 : void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
874 : const SDLoc &dl, SDValue &Chain, SDValue *Flag,
875 : const Value *V,
876 : ISD::NodeType PreferredExtendType) const {
877 863531 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
878 : ISD::NodeType ExtendKind = PreferredExtendType;
879 :
880 : // Get the list of the values's legal parts.
881 863531 : unsigned NumRegs = Regs.size();
882 863531 : SmallVector<SDValue, 8> Parts(NumRegs);
883 2036514 : for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
884 2345966 : unsigned NumParts = RegCount[Value];
885 :
886 : MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
887 2 : *DAG.getContext(),
888 4 : CallConv.getValue(), RegVTs[Value])
889 1172983 : : RegVTs[Value];
890 :
891 1172983 : if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
892 : ExtendKind = ISD::ZERO_EXTEND;
893 :
894 2345966 : getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part],
895 : NumParts, RegisterVT, V, CallConv, ExtendKind);
896 1172983 : Part += NumParts;
897 : }
898 :
899 : // Copy the parts into the registers.
900 863531 : SmallVector<SDValue, 8> Chains(NumRegs);
901 2042640 : for (unsigned i = 0; i != NumRegs; ++i) {
902 : SDValue Part;
903 1179109 : if (!Flag) {
904 2344792 : Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
905 : } else {
906 13426 : Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
907 6713 : *Flag = Part.getValue(1);
908 : }
909 :
910 2358218 : Chains[i] = Part.getValue(0);
911 : }
912 :
913 863531 : if (NumRegs == 1 || Flag)
914 : // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
915 : // flagged to it. That is the CopyToReg nodes and the user are considered
916 : // a single scheduling unit. If we create a TokenFactor and return it as
917 : // chain, then the TokenFactor is both a predecessor (operand) of the
918 : // user as well as a successor (the TF operands are flagged to the user).
919 : // c1, f1 = CopyToReg
920 : // c2, f2 = CopyToReg
921 : // c3 = TokenFactor c1, c2
922 : // ...
923 : // = op c3, ..., f2
924 1102936 : Chain = Chains[NumRegs-1];
925 : else
926 312063 : Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
927 863531 : }
928 :
929 59790 : void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
930 : unsigned MatchingIdx, const SDLoc &dl,
931 : SelectionDAG &DAG,
932 : std::vector<SDValue> &Ops) const {
933 59790 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
934 :
935 59790 : unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
936 59790 : if (HasMatching)
937 : Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
938 59458 : else if (!Regs.empty() &&
939 59458 : TargetRegisterInfo::isVirtualRegister(Regs.front())) {
940 : // Put the register class of the virtual registers in the flag word. That
941 : // way, later passes can recompute register class constraints for inline
942 : // assembly as well as normal instructions.
943 : // Don't do this for tied operands that can use the regclass information
944 : // from the def.
945 8869 : const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
946 : const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
947 8869 : Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
948 : }
949 :
950 59790 : SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
951 59790 : Ops.push_back(Res);
952 :
953 59790 : if (Code == InlineAsm::Kind_Clobber) {
954 : // Clobbers should always have a 1:1 mapping with registers, and may
955 : // reference registers that have illegal (e.g. vector) types. Hence, we
956 : // shouldn't try to apply any sort of splitting logic to them.
957 : assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
958 : "No 1:1 mapping from clobbers to regs?");
959 : unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
960 : (void)SP;
961 98314 : for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
962 147471 : Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
963 : assert(
964 : (Regs[I] != SP ||
965 : DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) &&
966 : "If we clobbered the stack pointer, MFI should know about it.");
967 : }
968 49157 : return;
969 : }
970 :
971 21266 : for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
972 21266 : unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
973 10633 : MVT RegisterVT = RegVTs[Value];
974 21464 : for (unsigned i = 0; i != NumRegs; ++i) {
975 : assert(Reg < Regs.size() && "Mismatch in # registers expected");
976 21662 : unsigned TheReg = Regs[Reg++];
977 21662 : Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
978 : }
979 : }
980 : }
981 :
982 : SmallVector<std::pair<unsigned, unsigned>, 4>
983 10 : RegsForValue::getRegsAndSizes() const {
984 : SmallVector<std::pair<unsigned, unsigned>, 4> OutVec;
985 : unsigned I = 0;
986 24 : for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
987 14 : unsigned RegCount = std::get<0>(CountAndVT);
988 14 : MVT RegisterVT = std::get<1>(CountAndVT);
989 14 : unsigned RegisterSize = RegisterVT.getSizeInBits();
990 39 : for (unsigned E = I + RegCount; I != E; ++I)
991 50 : OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
992 : }
993 10 : return OutVec;
994 : }
995 :
996 405292 : void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
997 : const TargetLibraryInfo *li) {
998 405292 : AA = aa;
999 405292 : GFI = gfi;
1000 405292 : LibInfo = li;
1001 405292 : DL = &DAG.getDataLayout();
1002 405292 : Context = DAG.getContext();
1003 405292 : LPadToCallSiteMap.clear();
1004 405292 : }
1005 :
1006 1269117 : void SelectionDAGBuilder::clear() {
1007 1269117 : NodeMap.clear();
1008 1269117 : UnusedArgNodeMap.clear();
1009 : PendingLoads.clear();
1010 : PendingExports.clear();
1011 1269116 : CurInst = nullptr;
1012 1269116 : HasTailCall = false;
1013 1269116 : SDNodeOrder = LowestSDNodeOrder;
1014 1269116 : StatepointLowering.clear();
1015 1269116 : }
1016 :
1017 405212 : void SelectionDAGBuilder::clearDanglingDebugInfo() {
1018 405212 : DanglingDebugInfoMap.clear();
1019 405212 : }
1020 :
1021 4838431 : SDValue SelectionDAGBuilder::getRoot() {
1022 4838431 : if (PendingLoads.empty())
1023 3187562 : return DAG.getRoot();
1024 :
1025 1650869 : if (PendingLoads.size() == 1) {
1026 1266171 : SDValue Root = PendingLoads[0];
1027 1266171 : DAG.setRoot(Root);
1028 : PendingLoads.clear();
1029 1266171 : return Root;
1030 : }
1031 :
1032 : // Otherwise, we have to make a token factor node.
1033 769396 : SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
1034 769396 : PendingLoads);
1035 : PendingLoads.clear();
1036 384698 : DAG.setRoot(Root);
1037 384698 : return Root;
1038 : }
1039 :
1040 2587698 : SDValue SelectionDAGBuilder::getControlRoot() {
1041 2587698 : SDValue Root = DAG.getRoot();
1042 :
1043 2587698 : if (PendingExports.empty())
1044 1878607 : return Root;
1045 :
1046 : // Turn all of the CopyToReg chains into one factored node.
1047 1418182 : if (Root.getOpcode() != ISD::EntryToken) {
1048 : unsigned i = 0, e = PendingExports.size();
1049 820704 : for (; i != e; ++i) {
1050 : assert(PendingExports[i].getNode()->getNumOperands() > 1);
1051 930692 : if (PendingExports[i].getNode()->getOperand(0) == Root)
1052 : break; // Don't add the root if we already indirectly depend on it.
1053 : }
1054 :
1055 355358 : if (i == e)
1056 355358 : PendingExports.push_back(Root);
1057 : }
1058 :
1059 2127273 : Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
1060 1418182 : PendingExports);
1061 : PendingExports.clear();
1062 709091 : DAG.setRoot(Root);
1063 709091 : return Root;
1064 : }
1065 :
1066 12215426 : void SelectionDAGBuilder::visit(const Instruction &I) {
1067 : // Set up outgoing PHI node register values before emitting the terminator.
1068 12215426 : if (I.isTerminator()) {
1069 889932 : HandlePHINodesInSuccessorBlocks(I.getParent());
1070 : }
1071 :
1072 : // Increase the SDNodeOrder if dealing with a non-debug instruction.
1073 : if (!isa<DbgInfoIntrinsic>(I))
1074 12091917 : ++SDNodeOrder;
1075 :
1076 12215426 : CurInst = &I;
1077 :
1078 24430852 : visit(I.getOpcode(), I);
1079 :
1080 : if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) {
1081 : // Propagate the fast-math-flags of this IR instruction to the DAG node that
1082 : // maps to this instruction.
1083 : // TODO: We could handle all flags (nsw, etc) here.
1084 : // TODO: If an IR instruction maps to >1 node, only the final node will have
1085 : // flags set.
1086 157138 : if (SDNode *Node = getNodeForIRValue(&I)) {
1087 : SDNodeFlags IncomingFlags;
1088 157065 : IncomingFlags.copyFMF(*FPMO);
1089 157065 : if (!Node->getFlags().isDefined())
1090 155169 : Node->setFlags(IncomingFlags);
1091 : else
1092 1896 : Node->intersectFlagsWith(IncomingFlags);
1093 : }
1094 : }
1095 :
1096 23535721 : if (!I.isTerminator() && !HasTailCall &&
1097 11320301 : !isStatepoint(&I)) // statepoints handle their exports internally
1098 11320242 : CopyToExportRegsIfNeeded(&I);
1099 :
1100 12215420 : CurInst = nullptr;
1101 12215420 : }
1102 :
1103 0 : void SelectionDAGBuilder::visitPHI(const PHINode &) {
1104 0 : llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
1105 : }
1106 :
1107 12905290 : void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
1108 : // Note: this doesn't use InstVisitor, because it has to work with
1109 : // ConstantExpr's in addition to instructions.
1110 12905290 : switch (Opcode) {
1111 0 : default: llvm_unreachable("Unknown instruction type encountered!");
1112 : // Build the switch statement using the Instruction.def file.
1113 : #define HANDLE_INST(NUM, OPCODE, CLASS) \
1114 : case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
1115 : #include "llvm/IR/Instruction.def"
1116 : }
1117 12905284 : }
1118 :
1119 123508 : void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
1120 : const DIExpression *Expr) {
1121 : auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
1122 : const DbgValueInst *DI = DDI.getDI();
1123 : DIVariable *DanglingVariable = DI->getVariable();
1124 : DIExpression *DanglingExpr = DI->getExpression();
1125 : if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) {
1126 : LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n");
1127 : return true;
1128 : }
1129 : return false;
1130 123508 : };
1131 :
1132 848616 : for (auto &DDIMI : DanglingDebugInfoMap) {
1133 725108 : DanglingDebugInfoVector &DDIV = DDIMI.second;
1134 1450216 : DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end());
1135 : }
1136 123508 : }
1137 :
1138 : // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
1139 : // generate the debug data structures now that we've seen its definition.
1140 6527123 : void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
1141 : SDValue Val) {
1142 6527123 : auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V);
1143 6527123 : if (DanglingDbgInfoIt == DanglingDebugInfoMap.end())
1144 6504734 : return;
1145 :
1146 : DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
1147 44962 : for (auto &DDI : DDIV) {
1148 22573 : const DbgValueInst *DI = DDI.getDI();
1149 : assert(DI && "Ill-formed DanglingDebugInfo");
1150 : DebugLoc dl = DDI.getdl();
1151 22573 : unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
1152 22573 : unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
1153 : DILocalVariable *Variable = DI->getVariable();
1154 : DIExpression *Expr = DI->getExpression();
1155 : assert(Variable->isValidLocationForIntrinsic(dl) &&
1156 : "Expected inlined-at fields to agree");
1157 : SDDbgValue *SDV;
1158 : if (Val.getNode()) {
1159 22573 : if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
1160 : LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
1161 : << DbgSDNodeOrder << "] for:\n " << *DI << "\n");
1162 : LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
1163 : // Increase the SDNodeOrder for the DbgValue here to make sure it is
1164 : // inserted after the definition of Val when emitting the instructions
1165 : // after ISel. An alternative could be to teach
1166 : // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
1167 : LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
1168 : << "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
1169 : << ValSDNodeOrder << "\n");
1170 10063 : SDV = getDbgValue(Val, Variable, Expr, dl,
1171 : std::max(DbgSDNodeOrder, ValSDNodeOrder));
1172 10063 : DAG.AddDbgValue(SDV, Val.getNode(), false);
1173 : } else
1174 : LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
1175 : << "in EmitFuncArgumentDbgValue\n");
1176 : } else
1177 : LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1178 : }
1179 : DDIV.clear();
1180 : }
1181 :
1182 : /// getCopyFromRegs - If there was virtual register allocated for the value V
1183 : /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
1184 6427786 : SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
1185 6427786 : DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
1186 : SDValue Result;
1187 :
1188 12855572 : if (It != FuncInfo.ValueMap.end()) {
1189 563649 : unsigned InReg = It->second;
1190 :
1191 1127298 : RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
1192 563649 : DAG.getDataLayout(), InReg, Ty,
1193 1127298 : None); // This is not an ABI copy.
1194 563649 : SDValue Chain = DAG.getEntryNode();
1195 1127298 : Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
1196 563649 : V);
1197 563649 : resolveDanglingDebugInfo(V, Result);
1198 : }
1199 :
1200 6427786 : return Result;
1201 : }
1202 :
1203 : /// getValue - Return an SDValue for the given Value.
1204 16097274 : SDValue SelectionDAGBuilder::getValue(const Value *V) {
1205 : // If we already have an SDValue for this value, use it. It's important
1206 : // to do this first, so that we don't create a CopyFromReg if we already
1207 : // have a regular SDValue.
1208 16097274 : SDValue &N = NodeMap[V];
1209 16097274 : if (N.getNode()) return N;
1210 :
1211 : // If there's a virtual register allocated and initialized for this
1212 : // value, use it.
1213 6427784 : if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
1214 563647 : return copyFromReg;
1215 :
1216 : // Otherwise create a new SDValue and remember it.
1217 5864137 : SDValue Val = getValueImpl(V);
1218 5864137 : NodeMap[V] = Val;
1219 5864137 : resolveDanglingDebugInfo(V, Val);
1220 5864137 : return Val;
1221 : }
1222 :
1223 : // Return true if SDValue exists for the given Value
1224 459 : bool SelectionDAGBuilder::findValue(const Value *V) const {
1225 459 : return (NodeMap.find(V) != NodeMap.end()) ||
1226 106 : (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
1227 : }
1228 :
1229 : /// getNonRegisterValue - Return an SDValue for the given Value, but
1230 : /// don't look in FuncInfo.ValueMap for a virtual register.
1231 856920 : SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
1232 : // If we already have an SDValue for this value, use it.
1233 856920 : SDValue &N = NodeMap[V];
1234 856920 : if (N.getNode()) {
1235 : if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
1236 : // Remove the debug location from the node as the node is about to be used
1237 : // in a location which may differ from the original debug location. This
1238 : // is relevant to Constant and ConstantFP nodes because they can appear
1239 : // as constant expressions inside PHI nodes.
1240 27030 : N->setDebugLoc(DebugLoc());
1241 : }
1242 757583 : return N;
1243 : }
1244 :
1245 : // Otherwise create a new SDValue and remember it.
1246 99337 : SDValue Val = getValueImpl(V);
1247 99337 : NodeMap[V] = Val;
1248 99337 : resolveDanglingDebugInfo(V, Val);
1249 99337 : return Val;
1250 : }
1251 :
1252 : /// getValueImpl - Helper function for getValue and getNonRegisterValue.
1253 : /// Create an SDValue for the given value.
1254 5963474 : SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
1255 5963474 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1256 :
1257 5963474 : if (const Constant *C = dyn_cast<Constant>(V)) {
1258 3436762 : EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
1259 :
1260 : if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
1261 3219191 : return DAG.getConstant(*CI, getCurSDLoc(), VT);
1262 :
1263 : if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
1264 4324826 : return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
1265 :
1266 904158 : if (isa<ConstantPointerNull>(C)) {
1267 65286 : unsigned AS = V->getType()->getPointerAddressSpace();
1268 65286 : return DAG.getConstant(0, getCurSDLoc(),
1269 195858 : TLI.getPointerTy(DAG.getDataLayout(), AS));
1270 : }
1271 :
1272 : if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
1273 66161 : return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
1274 :
1275 816750 : if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
1276 30823 : return DAG.getUNDEF(VT);
1277 :
1278 : if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
1279 1379728 : visit(CE->getOpcode(), *CE);
1280 689864 : SDValue N1 = NodeMap[V];
1281 : assert(N1.getNode() && "visit didn't populate the NodeMap!");
1282 689864 : return N1;
1283 : }
1284 :
1285 96063 : if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
1286 : SmallVector<SDValue, 4> Constants;
1287 4411 : for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
1288 4411 : OI != OE; ++OI) {
1289 2951 : SDNode *Val = getValue(*OI).getNode();
1290 : // If the operand is an empty aggregate, there are no values.
1291 2951 : if (!Val) continue;
1292 : // Add each leaf value from the operand to the Constants list
1293 : // to form a flattened list of all the values.
1294 5907 : for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1295 2957 : Constants.push_back(SDValue(Val, i));
1296 : }
1297 :
1298 2928 : return DAG.getMergeValues(Constants, getCurSDLoc());
1299 : }
1300 :
1301 : if (const ConstantDataSequential *CDS =
1302 : dyn_cast<ConstantDataSequential>(C)) {
1303 : SmallVector<SDValue, 4> Ops;
1304 341290 : for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
1305 274839 : SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
1306 : // Add each leaf value from the operand to the Constants list
1307 : // to form a flattened list of all the values.
1308 549678 : for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1309 274839 : Ops.push_back(SDValue(Val, i));
1310 : }
1311 :
1312 66451 : if (isa<ArrayType>(CDS->getType()))
1313 6 : return DAG.getMergeValues(Ops, getCurSDLoc());
1314 246829 : return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1315 : }
1316 :
1317 56304 : if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
1318 : assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
1319 : "Unknown struct or array constant!");
1320 :
1321 : SmallVector<EVT, 4> ValueVTs;
1322 9009 : ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
1323 9009 : unsigned NumElts = ValueVTs.size();
1324 9009 : if (NumElts == 0)
1325 1 : return SDValue(); // empty struct
1326 9008 : SmallVector<SDValue, 4> Constants(NumElts);
1327 27920 : for (unsigned i = 0; i != NumElts; ++i) {
1328 37824 : EVT EltVT = ValueVTs[i];
1329 18912 : if (isa<UndefValue>(C))
1330 18478 : Constants[i] = DAG.getUNDEF(EltVT);
1331 434 : else if (EltVT.isFloatingPoint())
1332 60 : Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1333 : else
1334 810 : Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
1335 : }
1336 :
1337 18104 : return DAG.getMergeValues(Constants, getCurSDLoc());
1338 : }
1339 :
1340 : if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
1341 214 : return DAG.getBlockAddress(BA, VT);
1342 :
1343 18929 : VectorType *VecTy = cast<VectorType>(V->getType());
1344 18929 : unsigned NumElements = VecTy->getNumElements();
1345 :
1346 : // Now that we know the number and type of the elements, get that number of
1347 : // elements into the Ops array based on what kind of constant it is.
1348 : SmallVector<SDValue, 16> Ops;
1349 : if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
1350 22649 : for (unsigned i = 0; i != NumElements; ++i)
1351 20477 : Ops.push_back(getValue(CV->getOperand(i)));
1352 : } else {
1353 : assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
1354 : EVT EltVT =
1355 16757 : TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
1356 :
1357 16757 : SDValue Op;
1358 16757 : if (EltVT.isFloatingPoint())
1359 7843 : Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1360 : else
1361 25592 : Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
1362 16757 : Ops.assign(NumElements, Op);
1363 : }
1364 :
1365 : // Create a BUILD_VECTOR node.
1366 56931 : return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1367 : }
1368 :
1369 : // If this is a static alloca, generate it as the frameindex instead of
1370 : // computation.
1371 : if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
1372 : DenseMap<const AllocaInst*, int>::iterator SI =
1373 2507219 : FuncInfo.StaticAllocaMap.find(AI);
1374 5014438 : if (SI != FuncInfo.StaticAllocaMap.end())
1375 2507219 : return DAG.getFrameIndex(SI->second,
1376 2507219 : TLI.getFrameIndexTy(DAG.getDataLayout()));
1377 : }
1378 :
1379 : // If this is an instruction which fast-isel has deferred, select it now.
1380 19493 : if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
1381 19493 : unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
1382 :
1383 38986 : RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
1384 19493 : Inst->getType(), getABIRegCopyCC(V));
1385 19493 : SDValue Chain = DAG.getEntryNode();
1386 58479 : return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
1387 : }
1388 :
1389 0 : llvm_unreachable("Can't get register for value!");
1390 : }
1391 :
1392 115 : void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
1393 115 : auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1394 115 : bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
1395 115 : bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
1396 : bool IsSEH = isAsynchronousEHPersonality(Pers);
1397 : bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX;
1398 115 : MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
1399 115 : if (!IsSEH)
1400 : CatchPadMBB->setIsEHScopeEntry();
1401 : // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
1402 115 : if (IsMSVCCXX || IsCoreCLR)
1403 : CatchPadMBB->setIsEHFuncletEntry();
1404 : // Wasm does not need catchpads anymore
1405 115 : if (!IsWasmCXX)
1406 107 : DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other,
1407 321 : getControlRoot()));
1408 115 : }
1409 :
1410 99 : void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
1411 : // Update machine-CFG edge.
1412 99 : MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
1413 99 : FuncInfo.MBB->addSuccessor(TargetMBB);
1414 :
1415 99 : auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1416 : bool IsSEH = isAsynchronousEHPersonality(Pers);
1417 : if (IsSEH) {
1418 : // If this is not a fall-through branch or optimizations are switched off,
1419 : // emit the branch.
1420 45 : if (TargetMBB != NextBlock(FuncInfo.MBB) ||
1421 18 : TM.getOptLevel() == CodeGenOpt::None)
1422 20 : DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
1423 30 : getControlRoot(), DAG.getBasicBlock(TargetMBB)));
1424 27 : return;
1425 : }
1426 :
1427 : // Figure out the funclet membership for the catchret's successor.
1428 : // This will be used by the FuncletLayout pass to determine how to order the
1429 : // BB's.
1430 : // A 'catchret' returns to the outer scope's color.
1431 : Value *ParentPad = I.getCatchSwitchParentPad();
1432 : const BasicBlock *SuccessorColor;
1433 72 : if (isa<ConstantTokenNone>(ParentPad))
1434 130 : SuccessorColor = &FuncInfo.Fn->getEntryBlock();
1435 : else
1436 7 : SuccessorColor = cast<Instruction>(ParentPad)->getParent();
1437 : assert(SuccessorColor && "No parent funclet for catchret!");
1438 72 : MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
1439 : assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
1440 :
1441 : // Create the terminator node.
1442 144 : SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
1443 72 : getControlRoot(), DAG.getBasicBlock(TargetMBB),
1444 144 : DAG.getBasicBlock(SuccessorColorMBB));
1445 72 : DAG.setRoot(Ret);
1446 : }
1447 :
1448 50 : void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
1449 : // Don't emit any special code for the cleanuppad instruction. It just marks
1450 : // the start of an EH scope/funclet.
1451 50 : FuncInfo.MBB->setIsEHScopeEntry();
1452 50 : auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1453 50 : if (Pers != EHPersonality::Wasm_CXX) {
1454 40 : FuncInfo.MBB->setIsEHFuncletEntry();
1455 40 : FuncInfo.MBB->setIsCleanupFuncletEntry();
1456 : }
1457 50 : }
1458 :
1459 : /// When an invoke or a cleanupret unwinds to the next EH pad, there are
1460 : /// many places it could ultimately go. In the IR, we have a single unwind
1461 : /// destination, but in the machine CFG, we enumerate all the possible blocks.
1462 : /// This function skips over imaginary basic blocks that hold catchswitch
1463 : /// instructions, and finds all the "real" machine
1464 : /// basic block destinations. As those destinations may not be successors of
1465 : /// EHPadBB, here we also calculate the edge probability to those destinations.
1466 : /// The passed-in Prob is the edge probability to EHPadBB.
1467 497030 : static void findUnwindDestinations(
1468 : FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
1469 : BranchProbability Prob,
1470 : SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
1471 : &UnwindDests) {
1472 : EHPersonality Personality =
1473 497030 : classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1474 497030 : bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
1475 497030 : bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
1476 : bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
1477 : bool IsSEH = isAsynchronousEHPersonality(Personality);
1478 :
1479 497159 : while (EHPadBB) {
1480 497034 : const Instruction *Pad = EHPadBB->getFirstNonPHI();
1481 : BasicBlock *NewEHPadBB = nullptr;
1482 497034 : if (isa<LandingPadInst>(Pad)) {
1483 : // Stop on landingpads. They are not funclets.
1484 496827 : UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1485 496827 : break;
1486 207 : } else if (isa<CleanupPadInst>(Pad)) {
1487 : // Stop on cleanup pads. Cleanups are always funclet entries for all known
1488 : // personalities.
1489 78 : UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1490 78 : UnwindDests.back().first->setIsEHScopeEntry();
1491 78 : if (!IsWasmCXX)
1492 64 : UnwindDests.back().first->setIsEHFuncletEntry();
1493 : break;
1494 : } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
1495 : // Add the catchpad handlers to the possible destinations.
1496 269 : for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
1497 140 : UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
1498 : // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
1499 140 : if (IsMSVCCXX || IsCoreCLR)
1500 92 : UnwindDests.back().first->setIsEHFuncletEntry();
1501 140 : if (!IsSEH)
1502 101 : UnwindDests.back().first->setIsEHScopeEntry();
1503 : }
1504 : NewEHPadBB = CatchSwitch->getUnwindDest();
1505 : } else {
1506 : continue;
1507 : }
1508 :
1509 129 : BranchProbabilityInfo *BPI = FuncInfo.BPI;
1510 129 : if (BPI && NewEHPadBB)
1511 31 : Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
1512 129 : EHPadBB = NewEHPadBB;
1513 : }
1514 497030 : }
1515 :
1516 39 : void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
1517 : // Update successor info.
1518 : SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
1519 : auto UnwindDest = I.getUnwindDest();
1520 39 : BranchProbabilityInfo *BPI = FuncInfo.BPI;
1521 : BranchProbability UnwindDestProb =
1522 39 : (BPI && UnwindDest)
1523 12 : ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
1524 39 : : BranchProbability::getZero();
1525 39 : findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
1526 53 : for (auto &UnwindDest : UnwindDests) {
1527 14 : UnwindDest.first->setIsEHPad();
1528 14 : addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
1529 : }
1530 39 : FuncInfo.MBB->normalizeSuccProbs();
1531 :
1532 : // Create the terminator node.
1533 : SDValue Ret =
1534 117 : DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
1535 39 : DAG.setRoot(Ret);
1536 39 : }
1537 :
1538 0 : void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
1539 0 : report_fatal_error("visitCatchSwitch not yet implemented!");
1540 : }
1541 :
1542 194288 : void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
1543 194288 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1544 194288 : auto &DL = DAG.getDataLayout();
1545 194288 : SDValue Chain = getControlRoot();
1546 : SmallVector<ISD::OutputArg, 8> Outs;
1547 : SmallVector<SDValue, 8> OutVals;
1548 :
1549 : // Calls to @llvm.experimental.deoptimize don't generate a return value, so
1550 : // lower
1551 : //
1552 : // %val = call <ty> @llvm.experimental.deoptimize()
1553 : // ret <ty> %val
1554 : //
1555 : // differently.
1556 194288 : if (I.getParent()->getTerminatingDeoptimizeCall()) {
1557 0 : LowerDeoptimizingReturn();
1558 : return;
1559 : }
1560 :
1561 194288 : if (!FuncInfo.CanLowerReturn) {
1562 1354 : unsigned DemoteReg = FuncInfo.DemoteRegister;
1563 1354 : const Function *F = I.getParent()->getParent();
1564 :
1565 : // Emit a store of the return value through the virtual register.
1566 : // Leave Outs empty so that LowerReturn won't try to load return
1567 : // registers the usual way.
1568 : SmallVector<EVT, 1> PtrValueVTs;
1569 1354 : ComputeValueVTs(TLI, DL,
1570 1354 : F->getReturnType()->getPointerTo(
1571 1354 : DAG.getDataLayout().getAllocaAddrSpace()),
1572 : PtrValueVTs);
1573 :
1574 5416 : SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
1575 1354 : DemoteReg, PtrValueVTs[0]);
1576 1354 : SDValue RetOp = getValue(I.getOperand(0));
1577 :
1578 : SmallVector<EVT, 4> ValueVTs;
1579 : SmallVector<uint64_t, 4> Offsets;
1580 1354 : ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
1581 1354 : unsigned NumValues = ValueVTs.size();
1582 :
1583 1354 : SmallVector<SDValue, 4> Chains(NumValues);
1584 3030 : for (unsigned i = 0; i != NumValues; ++i) {
1585 : // An aggregate return value cannot wrap around the address space, so
1586 : // offsets to its parts don't wrap either.
1587 5028 : SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
1588 3352 : Chains[i] = DAG.getStore(
1589 1676 : Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i),
1590 : // FIXME: better loc info would be nice.
1591 5028 : Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
1592 : }
1593 :
1594 4062 : Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
1595 2708 : MVT::Other, Chains);
1596 192934 : } else if (I.getNumOperands() != 0) {
1597 : SmallVector<EVT, 4> ValueVTs;
1598 136110 : ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
1599 136110 : unsigned NumValues = ValueVTs.size();
1600 136110 : if (NumValues) {
1601 136109 : SDValue RetOp = getValue(I.getOperand(0));
1602 :
1603 136109 : const Function *F = I.getParent()->getParent();
1604 :
1605 : ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
1606 136109 : if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1607 : Attribute::SExt))
1608 : ExtendKind = ISD::SIGN_EXTEND;
1609 132265 : else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1610 : Attribute::ZExt))
1611 : ExtendKind = ISD::ZERO_EXTEND;
1612 :
1613 136109 : LLVMContext &Context = F->getContext();
1614 136109 : bool RetInReg = F->getAttributes().hasAttribute(
1615 : AttributeList::ReturnIndex, Attribute::InReg);
1616 :
1617 276602 : for (unsigned j = 0; j != NumValues; ++j) {
1618 140493 : EVT VT = ValueVTs[j];
1619 :
1620 140493 : if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
1621 7781 : VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
1622 :
1623 : CallingConv::ID CC = F->getCallingConv();
1624 :
1625 140493 : unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT);
1626 140493 : MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);
1627 140493 : SmallVector<SDValue, 4> Parts(NumParts);
1628 421479 : getCopyToParts(DAG, getCurSDLoc(),
1629 140493 : SDValue(RetOp.getNode(), RetOp.getResNo() + j),
1630 : &Parts[0], NumParts, PartVT, &I, CC, ExtendKind);
1631 :
1632 : // 'inreg' on function refers to return value
1633 : ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1634 140493 : if (RetInReg)
1635 : Flags.setInReg();
1636 :
1637 : // Propagate extension type if any
1638 140493 : if (ExtendKind == ISD::SIGN_EXTEND)
1639 : Flags.setSExt();
1640 136649 : else if (ExtendKind == ISD::ZERO_EXTEND)
1641 : Flags.setZExt();
1642 :
1643 293793 : for (unsigned i = 0; i < NumParts; ++i) {
1644 459900 : Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
1645 : VT, /*isfixed=*/true, 0, 0));
1646 153300 : OutVals.push_back(Parts[i]);
1647 : }
1648 : }
1649 : }
1650 : }
1651 :
1652 : // Push in swifterror virtual register as the last element of Outs. This makes
1653 : // sure swifterror virtual register will be returned in the swifterror
1654 : // physical register.
1655 194288 : const Function *F = I.getParent()->getParent();
1656 314454 : if (TLI.supportSwiftError() &&
1657 314454 : F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
1658 : assert(FuncInfo.SwiftErrorArg && "Need a swift error argument");
1659 : ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1660 : Flags.setSwiftError();
1661 114 : Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
1662 : EVT(TLI.getPointerTy(DL)) /*argvt*/,
1663 : true /*isfixed*/, 1 /*origidx*/,
1664 : 0 /*partOffs*/));
1665 : // Create SDNode for the swifterror virtual register.
1666 114 : OutVals.push_back(
1667 114 : DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt(
1668 114 : &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first,
1669 228 : EVT(TLI.getPointerTy(DL))));
1670 : }
1671 :
1672 194288 : bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
1673 : CallingConv::ID CallConv =
1674 : DAG.getMachineFunction().getFunction().getCallingConv();
1675 194288 : Chain = DAG.getTargetLoweringInfo().LowerReturn(
1676 388575 : Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
1677 :
1678 : // Verify that the target's LowerReturn behaved as expected.
1679 : assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
1680 : "LowerReturn didn't return a valid chain!");
1681 :
1682 : // Update the DAG with the new chain value resulting from return lowering.
1683 194288 : DAG.setRoot(Chain);
1684 : }
1685 :
1686 : /// CopyToExportRegsIfNeeded - If the given value has virtual registers
1687 : /// created for it, emit nodes to copy the value into the virtual
1688 : /// registers.
1689 11876238 : void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
1690 : // Skip empty types
1691 11876238 : if (V->getType()->isEmptyTy())
1692 16 : return;
1693 :
1694 11876222 : DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
1695 23752444 : if (VMI != FuncInfo.ValueMap.end()) {
1696 : assert(!V->use_empty() && "Unused value assigned virtual registers!");
1697 750065 : CopyValueToVirtualRegister(V, VMI->second);
1698 : }
1699 : }
1700 :
1701 : /// ExportFromCurrentBlock - If this condition isn't known to be exported from
1702 : /// the current basic block, add it to ValueMap now so that we'll get a
1703 : /// CopyTo/FromReg.
1704 12308 : void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
1705 : // No need to export constants.
1706 12308 : if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
1707 :
1708 : // Already exported?
1709 17122 : if (FuncInfo.isExportedInst(V)) return;
1710 :
1711 5848 : unsigned Reg = FuncInfo.InitializeRegForValue(V);
1712 5848 : CopyValueToVirtualRegister(V, Reg);
1713 : }
1714 :
1715 1340 : bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
1716 : const BasicBlock *FromBB) {
1717 : // The operands of the setcc have to be in this block. We don't know
1718 : // how to export them from some other block.
1719 : if (const Instruction *VI = dyn_cast<Instruction>(V)) {
1720 : // Can export from current BB.
1721 678 : if (VI->getParent() == FromBB)
1722 : return true;
1723 :
1724 : // Is already exported, noop.
1725 354 : return FuncInfo.isExportedInst(V);
1726 : }
1727 :
1728 : // If this is an argument, we can export it if the BB is the entry block or
1729 : // if it is already exported.
1730 662 : if (isa<Argument>(V)) {
1731 358 : if (FromBB == &FromBB->getParent()->getEntryBlock())
1732 : return true;
1733 :
1734 : // Otherwise, can only export this if it is already exported.
1735 202 : return FuncInfo.isExportedInst(V);
1736 : }
1737 :
1738 : // Otherwise, constants can always be exported.
1739 : return true;
1740 : }
1741 :
1742 : /// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
1743 : BranchProbability
1744 184359 : SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
1745 : const MachineBasicBlock *Dst) const {
1746 184359 : BranchProbabilityInfo *BPI = FuncInfo.BPI;
1747 184359 : const BasicBlock *SrcBB = Src->getBasicBlock();
1748 184359 : const BasicBlock *DstBB = Dst->getBasicBlock();
1749 184359 : if (!BPI) {
1750 : // If BPI is not available, set the default probability as 1 / N, where N is
1751 : // the number of successors.
1752 10458 : auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
1753 10458 : return BranchProbability(1, SuccSize);
1754 : }
1755 173901 : return BPI->getEdgeProbability(SrcBB, DstBB);
1756 : }
1757 :
1758 1189728 : void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
1759 : MachineBasicBlock *Dst,
1760 : BranchProbability Prob) {
1761 1189728 : if (!FuncInfo.BPI)
1762 965516 : Src->addSuccessorWithoutProb(Dst);
1763 : else {
1764 224212 : if (Prob.isUnknown())
1765 171490 : Prob = getEdgeProbability(Src, Dst);
1766 224212 : Src->addSuccessor(Dst, Prob);
1767 : }
1768 1189728 : }
1769 :
1770 : static bool InBlock(const Value *V, const BasicBlock *BB) {
1771 : if (const Instruction *I = dyn_cast<Instruction>(V))
1772 1504 : return I->getParent() == BB;
1773 : return true;
1774 : }
1775 :
1776 : /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
1777 : /// This function emits a branch and is used at the leaves of an OR or an
1778 : /// AND operator tree.
1779 : void
1780 1494 : SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
1781 : MachineBasicBlock *TBB,
1782 : MachineBasicBlock *FBB,
1783 : MachineBasicBlock *CurBB,
1784 : MachineBasicBlock *SwitchBB,
1785 : BranchProbability TProb,
1786 : BranchProbability FProb,
1787 : bool InvertCond) {
1788 1494 : const BasicBlock *BB = CurBB->getBasicBlock();
1789 :
1790 : // If the leaf of the tree is a comparison, merge the condition into
1791 : // the caseblock.
1792 : if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
1793 : // The operands of the cmp have to be in this block. We don't know
1794 : // how to export them from some other block. If this is the first block
1795 : // of the sequence, no exporting is needed.
1796 2022 : if (CurBB == SwitchBB ||
1797 1340 : (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
1798 664 : isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
1799 : ISD::CondCode Condition;
1800 : if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
1801 : ICmpInst::Predicate Pred =
1802 1287 : InvertCond ? IC->getInversePredicate() : IC->getPredicate();
1803 1287 : Condition = getICmpCondCode(Pred);
1804 : } else {
1805 : const FCmpInst *FC = cast<FCmpInst>(Cond);
1806 : FCmpInst::Predicate Pred =
1807 47 : InvertCond ? FC->getInversePredicate() : FC->getPredicate();
1808 47 : Condition = getFCmpCondCode(Pred);
1809 47 : if (TM.Options.NoNaNsFPMath)
1810 0 : Condition = getFCmpCodeWithoutNaN(Condition);
1811 : }
1812 :
1813 : CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
1814 1334 : TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1815 1334 : SwitchCases.push_back(CB);
1816 : return;
1817 : }
1818 : }
1819 :
1820 : // Create a CaseBlock record representing this branch.
1821 160 : ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
1822 160 : CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
1823 160 : nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1824 160 : SwitchCases.push_back(CB);
1825 : }
1826 :
1827 : /// FindMergedConditions - If Cond is an expression like
1828 2293 : void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
1829 : MachineBasicBlock *TBB,
1830 : MachineBasicBlock *FBB,
1831 : MachineBasicBlock *CurBB,
1832 : MachineBasicBlock *SwitchBB,
1833 : Instruction::BinaryOps Opc,
1834 : BranchProbability TProb,
1835 : BranchProbability FProb,
1836 : bool InvertCond) {
1837 : // Skip over not part of the tree and remember to invert op and operands at
1838 : // next level.
1839 2328 : if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) {
1840 32 : const Value *CondOp = BinaryOperator::getNotArgument(Cond);
1841 32 : if (InBlock(CondOp, CurBB->getBasicBlock())) {
1842 30 : FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
1843 30 : !InvertCond);
1844 30 : return;
1845 : }
1846 : }
1847 :
1848 : const Instruction *BOp = dyn_cast<Instruction>(Cond);
1849 : // Compute the effective opcode for Cond, taking into account whether it needs
1850 : // to be inverted, e.g.
1851 : // and (not (or A, B)), C
1852 : // gets lowered as
1853 : // and (and (not A, not B), C)
1854 : unsigned BOpc = 0;
1855 : if (BOp) {
1856 : BOpc = BOp->getOpcode();
1857 2180 : if (InvertCond) {
1858 41 : if (BOpc == Instruction::And)
1859 : BOpc = Instruction::Or;
1860 37 : else if (BOpc == Instruction::Or)
1861 : BOpc = Instruction::And;
1862 : }
1863 : }
1864 :
1865 : // If this node is not part of the or/and tree, emit it as a branch.
1866 3526 : if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
1867 784 : BOpc != unsigned(Opc) || !BOp->hasOneUse() ||
1868 1533 : BOp->getParent() != CurBB->getBasicBlock() ||
1869 3769 : !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
1870 : !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
1871 1494 : EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
1872 : TProb, FProb, InvertCond);
1873 1494 : return;
1874 : }
1875 :
1876 : // Create TmpBB after CurBB.
1877 : MachineFunction::iterator BBI(CurBB);
1878 769 : MachineFunction &MF = DAG.getMachineFunction();
1879 769 : MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
1880 769 : CurBB->getParent()->insert(++BBI, TmpBB);
1881 :
1882 769 : if (Opc == Instruction::Or) {
1883 : // Codegen X | Y as:
1884 : // BB1:
1885 : // jmp_if_X TBB
1886 : // jmp TmpBB
1887 : // TmpBB:
1888 : // jmp_if_Y TBB
1889 : // jmp FBB
1890 : //
1891 :
1892 : // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1893 : // The requirement is that
1894 : // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
1895 : // = TrueProb for original BB.
1896 : // Assuming the original probabilities are A and B, one choice is to set
1897 : // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
1898 : // A/(1+B) and 2B/(1+B). This choice assumes that
1899 : // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
1900 : // Another choice is to assume TrueProb for BB1 equals to TrueProb for
1901 : // TmpBB, but the math is more complicated.
1902 :
1903 316 : auto NewTrueProb = TProb / 2;
1904 316 : auto NewFalseProb = TProb / 2 + FProb;
1905 : // Emit the LHS condition.
1906 632 : FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
1907 : NewTrueProb, NewFalseProb, InvertCond);
1908 :
1909 : // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
1910 316 : SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
1911 316 : BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1912 : // Emit the RHS condition into TmpBB.
1913 316 : FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1914 : Probs[0], Probs[1], InvertCond);
1915 : } else {
1916 : assert(Opc == Instruction::And && "Unknown merge op!");
1917 : // Codegen X & Y as:
1918 : // BB1:
1919 : // jmp_if_X TmpBB
1920 : // jmp FBB
1921 : // TmpBB:
1922 : // jmp_if_Y TBB
1923 : // jmp FBB
1924 : //
1925 : // This requires creation of TmpBB after CurBB.
1926 :
1927 : // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1928 : // The requirement is that
1929 : // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
1930 : // = FalseProb for original BB.
1931 : // Assuming the original probabilities are A and B, one choice is to set
1932 : // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
1933 : // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
1934 : // TrueProb for BB1 * FalseProb for TmpBB.
1935 :
1936 453 : auto NewTrueProb = TProb + FProb / 2;
1937 453 : auto NewFalseProb = FProb / 2;
1938 : // Emit the LHS condition.
1939 906 : FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
1940 : NewTrueProb, NewFalseProb, InvertCond);
1941 :
1942 : // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
1943 453 : SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
1944 453 : BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1945 : // Emit the RHS condition into TmpBB.
1946 453 : FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1947 : Probs[0], Probs[1], InvertCond);
1948 : }
1949 : }
1950 :
1951 : /// If the set of cases should be emitted as a series of branches, return true.
1952 : /// If we should emit this as a bunch of and/or'd together conditions, return
1953 : /// false.
1954 : bool
1955 725 : SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
1956 1450 : if (Cases.size() != 2) return true;
1957 :
1958 : // If this is two comparisons of the same values or'd or and'd together, they
1959 : // will get folded into a single comparison, so don't emit two blocks.
1960 711 : if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
1961 681 : Cases[0].CmpRHS == Cases[1].CmpRHS) ||
1962 672 : (Cases[0].CmpRHS == Cases[1].CmpLHS &&
1963 2 : Cases[0].CmpLHS == Cases[1].CmpRHS)) {
1964 : return false;
1965 : }
1966 :
1967 : // Handle: (X != null) | (Y != null) --> (X|Y) != 0
1968 : // Handle: (X == null) & (Y == null) --> (X|Y) == 0
1969 785 : if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
1970 113 : Cases[0].CC == Cases[1].CC &&
1971 757 : isa<Constant>(Cases[0].CmpRHS) &&
1972 85 : cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
1973 44 : if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
1974 : return false;
1975 42 : if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
1976 3 : return false;
1977 : }
1978 :
1979 : return true;
1980 : }
1981 :
1982 158770 : void SelectionDAGBuilder::visitBr(const BranchInst &I) {
1983 158770 : MachineBasicBlock *BrMBB = FuncInfo.MBB;
1984 :
1985 : // Update machine-CFG edges.
1986 158770 : MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
1987 :
1988 158770 : if (I.isUnconditional()) {
1989 : // Update machine-CFG edges.
1990 92873 : BrMBB->addSuccessor(Succ0MBB);
1991 :
1992 : // If this is not a fall-through branch or optimizations are switched off,
1993 : // emit the branch.
1994 92873 : if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
1995 106340 : DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
1996 : MVT::Other, getControlRoot(),
1997 159510 : DAG.getBasicBlock(Succ0MBB)));
1998 :
1999 93584 : return;
2000 : }
2001 :
2002 : // If this condition is one of the special cases we handle, do special stuff
2003 : // now.
2004 : const Value *CondVal = I.getCondition();
2005 65897 : MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
2006 :
2007 : // If this is a series of conditions that are or'd or and'd together, emit
2008 : // this as a sequence of branches instead of setcc's with and/or operations.
2009 : // As long as jumps are not expensive, this should improve performance.
2010 : // For example, instead of something like:
2011 : // cmp A, B
2012 : // C = seteq
2013 : // cmp D, E
2014 : // F = setle
2015 : // or C, F
2016 : // jnz foo
2017 : // Emit:
2018 : // cmp A, B
2019 : // je foo
2020 : // cmp D, E
2021 : // jle foo
2022 : if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
2023 : Instruction::BinaryOps Opcode = BOp->getOpcode();
2024 1502 : if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
2025 1799 : !I.getMetadata(LLVMContext::MD_unpredictable) &&
2026 854 : (Opcode == Instruction::And || Opcode == Instruction::Or)) {
2027 725 : FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
2028 : Opcode,
2029 : getEdgeProbability(BrMBB, Succ0MBB),
2030 : getEdgeProbability(BrMBB, Succ1MBB),
2031 : /*InvertCond=*/false);
2032 : // If the compares in later blocks need to use values not currently
2033 : // exported from this block, export them now. This block should always
2034 : // be the first entry.
2035 : assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
2036 :
2037 : // Allow some cases to be rejected.
2038 725 : if (ShouldEmitAsBranches(SwitchCases)) {
2039 2177 : for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
2040 1510 : ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
2041 1510 : ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
2042 : }
2043 :
2044 : // Emit the branch for this block.
2045 711 : visitSwitchCase(SwitchCases[0], BrMBB);
2046 : SwitchCases.erase(SwitchCases.begin());
2047 711 : return;
2048 : }
2049 :
2050 : // Okay, we decided not to do this, remove any inserted MBB's and clear
2051 : // SwitchCases.
2052 42 : for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
2053 28 : FuncInfo.MF->erase(SwitchCases[i].ThisBB);
2054 :
2055 : SwitchCases.clear();
2056 : }
2057 : }
2058 :
2059 : // Create a CaseBlock record representing this branch.
2060 65186 : CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
2061 65186 : nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
2062 :
2063 : // Use visitSwitchCase to actually insert the fast branch sequence for this
2064 : // cond branch.
2065 65186 : visitSwitchCase(CB, BrMBB);
2066 : }
2067 :
2068 : /// visitSwitchCase - Emits the necessary code to represent a single node in
2069 : /// the binary search tree resulting from lowering a switch instruction.
2070 85593 : void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
2071 : MachineBasicBlock *SwitchBB) {
2072 : SDValue Cond;
2073 85593 : SDValue CondLHS = getValue(CB.CmpLHS);
2074 : SDLoc dl = CB.DL;
2075 :
2076 : // Build the setcc now.
2077 85593 : if (!CB.CmpMHS) {
2078 : // Fold "(X == true)" to X and "(X == false)" to !X to
2079 : // handle common cases produced by branch lowering.
2080 81902 : if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
2081 65343 : CB.CC == ISD::SETEQ)
2082 65334 : Cond = CondLHS;
2083 16568 : else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
2084 4 : CB.CC == ISD::SETEQ) {
2085 8 : SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
2086 8 : Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
2087 : } else
2088 16564 : Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
2089 : } else {
2090 : assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
2091 :
2092 3691 : const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
2093 3691 : const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
2094 :
2095 3691 : SDValue CmpOp = getValue(CB.CmpMHS);
2096 3691 : EVT VT = CmpOp.getValueType();
2097 :
2098 3691 : if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
2099 2 : Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
2100 1 : ISD::SETLE);
2101 : } else {
2102 3690 : SDValue SUB = DAG.getNode(ISD::SUB, dl,
2103 3690 : VT, CmpOp, DAG.getConstant(Low, dl, VT));
2104 3690 : Cond = DAG.getSetCC(dl, MVT::i1, SUB,
2105 3690 : DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
2106 : }
2107 : }
2108 :
2109 : // Update successor info
2110 85593 : addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
2111 : // TrueBB and FalseBB are always different unless the incoming IR is
2112 : // degenerate. This only happens when running llc on weird IR.
2113 85593 : if (CB.TrueBB != CB.FalseBB)
2114 85586 : addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
2115 : SwitchBB->normalizeSuccProbs();
2116 :
2117 : // If the lhs block is the next block, invert the condition so that we can
2118 : // fall through to the lhs instead of the rhs block.
2119 85593 : if (CB.TrueBB == NextBlock(SwitchBB)) {
2120 : std::swap(CB.TrueBB, CB.FalseBB);
2121 61346 : SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
2122 61346 : Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
2123 : }
2124 :
2125 85593 : SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2126 : MVT::Other, getControlRoot(), Cond,
2127 85593 : DAG.getBasicBlock(CB.TrueBB));
2128 :
2129 : // Insert the false branch. Do this even if it's a fall through branch,
2130 : // this makes it easier to do DAG optimizations which require inverting
2131 : // the branch condition.
2132 85593 : BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2133 85593 : DAG.getBasicBlock(CB.FalseBB));
2134 :
2135 85593 : DAG.setRoot(BrCond);
2136 85593 : }
2137 :
2138 : /// visitJumpTable - Emit JumpTable node in the current MBB
2139 3199 : void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
2140 : // Emit the code for the jump table
2141 : assert(JT.Reg != -1U && "Should lower JT Header first!");
2142 3199 : EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
2143 9597 : SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
2144 3199 : JT.Reg, PTy);
2145 3199 : SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
2146 6398 : SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
2147 : MVT::Other, Index.getValue(1),
2148 3199 : Table, Index);
2149 3199 : DAG.setRoot(BrJumpTable);
2150 3199 : }
2151 :
2152 : /// visitJumpTableHeader - This function emits necessary code to produce index
2153 : /// in the JumpTable from switch case.
2154 3199 : void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
2155 : JumpTableHeader &JTH,
2156 : MachineBasicBlock *SwitchBB) {
2157 3199 : SDLoc dl = getCurSDLoc();
2158 :
2159 : // Subtract the lowest switch case value from the value being switched on and
2160 : // conditional branch to default mbb if the result is greater than the
2161 : // difference between smallest and largest cases.
2162 3199 : SDValue SwitchOp = getValue(JTH.SValue);
2163 3199 : EVT VT = SwitchOp.getValueType();
2164 3199 : SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2165 3199 : DAG.getConstant(JTH.First, dl, VT));
2166 :
2167 : // The SDNode we just created, which holds the value being switched on minus
2168 : // the smallest case value, needs to be copied to a virtual register so it
2169 : // can be used as an index into the jump table in a subsequent basic block.
2170 : // This value may be smaller or larger than the target's pointer type, and
2171 : // therefore require extension or truncating.
2172 3199 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2173 6398 : SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
2174 :
2175 : unsigned JumpTableReg =
2176 6398 : FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
2177 3199 : SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
2178 3199 : JumpTableReg, SwitchOp);
2179 3199 : JT.Reg = JumpTableReg;
2180 :
2181 : // Emit the range check for the jump table, and branch to the default block
2182 : // for the switch statement if the value being switched on exceeds the largest
2183 : // case in the switch.
2184 3199 : SDValue CMP = DAG.getSetCC(
2185 3199 : dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2186 3199 : Sub.getValueType()),
2187 9597 : Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
2188 :
2189 3199 : SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2190 : MVT::Other, CopyTo, CMP,
2191 3199 : DAG.getBasicBlock(JT.Default));
2192 :
2193 : // Avoid emitting unnecessary branches to the next block.
2194 3199 : if (JT.MBB != NextBlock(SwitchBB))
2195 30 : BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2196 30 : DAG.getBasicBlock(JT.MBB));
2197 :
2198 3199 : DAG.setRoot(BrCond);
2199 3199 : }
2200 :
2201 : /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
2202 : /// variable if there exists one.
2203 0 : static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
2204 : SDValue &Chain) {
2205 0 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2206 0 : EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2207 0 : MachineFunction &MF = DAG.getMachineFunction();
2208 0 : Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
2209 : MachineSDNode *Node =
2210 0 : DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
2211 0 : if (Global) {
2212 : MachinePointerInfo MPInfo(Global);
2213 : auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
2214 : MachineMemOperand::MODereferenceable;
2215 0 : MachineMemOperand *MemRef = MF.getMachineMemOperand(
2216 0 : MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy));
2217 0 : DAG.setNodeMemRefs(Node, {MemRef});
2218 : }
2219 0 : return SDValue(Node, 0);
2220 : }
2221 :
2222 : /// Codegen a new tail for a stack protector check ParentMBB which has had its
2223 : /// tail spliced into a stack protector check success bb.
2224 : ///
2225 : /// For a high level explanation of how this fits into the stack protector
2226 : /// generation see the comment on the declaration of class
2227 : /// StackProtectorDescriptor.
2228 340 : void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
2229 : MachineBasicBlock *ParentBB) {
2230 :
2231 : // First create the loads to the guard/stack slot for the comparison.
2232 340 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2233 340 : EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2234 :
2235 340 : MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
2236 340 : int FI = MFI.getStackProtectorIndex();
2237 :
2238 : SDValue Guard;
2239 340 : SDLoc dl = getCurSDLoc();
2240 340 : SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
2241 340 : const Module &M = *ParentBB->getParent()->getFunction().getParent();
2242 340 : unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
2243 :
2244 : // Generate code to load the content of the guard slot.
2245 340 : SDValue GuardVal = DAG.getLoad(
2246 340 : PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
2247 : MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
2248 340 : MachineMemOperand::MOVolatile);
2249 :
2250 340 : if (TLI.useStackGuardXorFP())
2251 145 : GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
2252 :
2253 : // Retrieve guard check function, nullptr if instrumentation is inlined.
2254 340 : if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
2255 : // The target provides a guard check function to validate the guard value.
2256 : // Generate a call to that function with the content of the guard slot as
2257 : // argument.
2258 : auto *Fn = cast<Function>(GuardCheck);
2259 : FunctionType *FnTy = Fn->getFunctionType();
2260 : assert(FnTy->getNumParams() == 1 && "Invalid function signature");
2261 :
2262 : TargetLowering::ArgListTy Args;
2263 : TargetLowering::ArgListEntry Entry;
2264 82 : Entry.Node = GuardVal;
2265 164 : Entry.Ty = FnTy->getParamType(0);
2266 82 : if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
2267 82 : Entry.IsInReg = true;
2268 82 : Args.push_back(Entry);
2269 :
2270 164 : TargetLowering::CallLoweringInfo CLI(DAG);
2271 82 : CLI.setDebugLoc(getCurSDLoc())
2272 82 : .setChain(DAG.getEntryNode())
2273 : .setCallee(Fn->getCallingConv(), FnTy->getReturnType(),
2274 82 : getValue(GuardCheck), std::move(Args));
2275 :
2276 82 : std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
2277 82 : DAG.setRoot(Result.second);
2278 : return;
2279 : }
2280 :
2281 : // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
2282 : // Otherwise, emit a volatile load to retrieve the stack guard value.
2283 258 : SDValue Chain = DAG.getEntryNode();
2284 258 : if (TLI.useLoadStackGuardNode()) {
2285 143 : Guard = getLoadStackGuard(DAG, dl, Chain);
2286 : } else {
2287 115 : const Value *IRGuard = TLI.getSDagStackGuard(M);
2288 115 : SDValue GuardPtr = getValue(IRGuard);
2289 :
2290 115 : Guard =
2291 115 : DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0),
2292 115 : Align, MachineMemOperand::MOVolatile);
2293 : }
2294 :
2295 : // Perform the comparison via a subtract/getsetcc.
2296 258 : EVT VT = Guard.getValueType();
2297 516 : SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal);
2298 :
2299 258 : SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
2300 258 : *DAG.getContext(),
2301 258 : Sub.getValueType()),
2302 258 : Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
2303 :
2304 : // If the sub is not 0, then we know the guard/stackslot do not equal, so
2305 : // branch to failure MBB.
2306 258 : SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2307 : MVT::Other, GuardVal.getOperand(0),
2308 258 : Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
2309 : // Otherwise branch to success MBB.
2310 258 : SDValue Br = DAG.getNode(ISD::BR, dl,
2311 : MVT::Other, BrCond,
2312 258 : DAG.getBasicBlock(SPD.getSuccessMBB()));
2313 :
2314 258 : DAG.setRoot(Br);
2315 : }
2316 :
2317 : /// Codegen the failure basic block for a stack protector check.
2318 : ///
2319 : /// A failure stack protector machine basic block consists simply of a call to
2320 : /// __stack_chk_fail().
2321 : ///
2322 : /// For a high level explanation of how this fits into the stack protector
2323 : /// generation see the comment on the declaration of class
2324 : /// StackProtectorDescriptor.
2325 : void
2326 249 : SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
2327 249 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2328 : SDValue Chain =
2329 249 : TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
2330 249 : None, false, getCurSDLoc(), false, false).second;
2331 249 : DAG.setRoot(Chain);
2332 249 : }
2333 :
2334 : /// visitBitTestHeader - This function emits necessary code to produce value
2335 : /// suitable for "bit tests"
2336 49 : void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
2337 : MachineBasicBlock *SwitchBB) {
2338 49 : SDLoc dl = getCurSDLoc();
2339 :
2340 : // Subtract the minimum value
2341 49 : SDValue SwitchOp = getValue(B.SValue);
2342 49 : EVT VT = SwitchOp.getValueType();
2343 49 : SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2344 49 : DAG.getConstant(B.First, dl, VT));
2345 :
2346 : // Check range
2347 49 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2348 : SDValue RangeCmp = DAG.getSetCC(
2349 49 : dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2350 49 : Sub.getValueType()),
2351 49 : Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
2352 :
2353 : // Determine the type of the test operands.
2354 : bool UsePtrType = false;
2355 : if (!TLI.isTypeLegal(VT))
2356 : UsePtrType = true;
2357 : else {
2358 97 : for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
2359 122 : if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
2360 : // Switch table case range are encoded into series of masks.
2361 : // Just use pointer type, it's guaranteed to fit.
2362 : UsePtrType = true;
2363 : break;
2364 : }
2365 : }
2366 49 : if (UsePtrType) {
2367 13 : VT = TLI.getPointerTy(DAG.getDataLayout());
2368 13 : Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
2369 : }
2370 :
2371 49 : B.RegVT = VT.getSimpleVT();
2372 49 : B.Reg = FuncInfo.CreateReg(B.RegVT);
2373 49 : SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
2374 :
2375 49 : MachineBasicBlock* MBB = B.Cases[0].ThisBB;
2376 :
2377 49 : addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
2378 49 : addSuccessorWithProb(SwitchBB, MBB, B.Prob);
2379 : SwitchBB->normalizeSuccProbs();
2380 :
2381 49 : SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
2382 : MVT::Other, CopyTo, RangeCmp,
2383 49 : DAG.getBasicBlock(B.Default));
2384 :
2385 : // Avoid emitting unnecessary branches to the next block.
2386 49 : if (MBB != NextBlock(SwitchBB))
2387 6 : BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
2388 6 : DAG.getBasicBlock(MBB));
2389 :
2390 49 : DAG.setRoot(BrRange);
2391 49 : }
2392 :
2393 : /// visitBitTestCase - this function produces one "bit test"
2394 60 : void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
2395 : MachineBasicBlock* NextMBB,
2396 : BranchProbability BranchProbToNext,
2397 : unsigned Reg,
2398 : BitTestCase &B,
2399 : MachineBasicBlock *SwitchBB) {
2400 60 : SDLoc dl = getCurSDLoc();
2401 60 : MVT VT = BB.RegVT;
2402 120 : SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
2403 60 : SDValue Cmp;
2404 60 : unsigned PopCount = countPopulation(B.Mask);
2405 60 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2406 60 : if (PopCount == 1) {
2407 : // Testing for a single bit; just compare the shift count with what it
2408 : // would need to be to shift a 1 bit in that position.
2409 3 : Cmp = DAG.getSetCC(
2410 6 : dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2411 : ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
2412 3 : ISD::SETEQ);
2413 114 : } else if (PopCount == BB.Range) {
2414 : // There is only one zero bit in the range, test for it directly.
2415 4 : Cmp = DAG.getSetCC(
2416 8 : dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2417 : ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
2418 4 : ISD::SETNE);
2419 : } else {
2420 : // Make desired shift
2421 : SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
2422 53 : DAG.getConstant(1, dl, VT), ShiftOp);
2423 :
2424 : // Emit bit tests and jumps
2425 53 : SDValue AndOp = DAG.getNode(ISD::AND, dl,
2426 53 : VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
2427 53 : Cmp = DAG.getSetCC(
2428 106 : dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2429 53 : AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
2430 : }
2431 :
2432 : // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
2433 60 : addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
2434 : // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
2435 60 : addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
2436 : // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
2437 : // one as they are relative probabilities (and thus work more like weights),
2438 : // and hence we need to normalize them to let the sum of them become one.
2439 : SwitchBB->normalizeSuccProbs();
2440 :
2441 60 : SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
2442 : MVT::Other, getControlRoot(),
2443 60 : Cmp, DAG.getBasicBlock(B.TargetBB));
2444 :
2445 : // Avoid emitting unnecessary branches to the next block.
2446 60 : if (NextMBB != NextBlock(SwitchBB))
2447 27 : BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
2448 27 : DAG.getBasicBlock(NextMBB));
2449 :
2450 60 : DAG.setRoot(BrAnd);
2451 60 : }
2452 :
2453 496991 : void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
2454 496991 : MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
2455 :
2456 : // Retrieve successors. Look through artificial IR level blocks like
2457 : // catchswitch for successors.
2458 496991 : MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
2459 : const BasicBlock *EHPadBB = I.getSuccessor(1);
2460 :
2461 : // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
2462 : // have to do anything here to lower funclet bundles.
2463 : assert(!I.hasOperandBundlesOtherThan(
2464 : {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
2465 : "Cannot lower invokes with arbitrary operand bundles yet!");
2466 :
2467 : const Value *Callee(I.getCalledValue());
2468 : const Function *Fn = dyn_cast<Function>(Callee);
2469 496991 : if (isa<InlineAsm>(Callee))
2470 1 : visitInlineAsm(&I);
2471 496990 : else if (Fn && Fn->isIntrinsic()) {
2472 11 : switch (Fn->getIntrinsicID()) {
2473 0 : default:
2474 0 : llvm_unreachable("Cannot invoke this intrinsic");
2475 : case Intrinsic::donothing:
2476 : // Ignore invokes to @llvm.donothing: jump directly to the next BB.
2477 : break;
2478 : case Intrinsic::experimental_patchpoint_void:
2479 : case Intrinsic::experimental_patchpoint_i64:
2480 2 : visitPatchpoint(&I, EHPadBB);
2481 2 : break;
2482 8 : case Intrinsic::experimental_gc_statepoint:
2483 8 : LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
2484 8 : break;
2485 : }
2486 496979 : } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) {
2487 : // Currently we do not lower any intrinsic calls with deopt operand bundles.
2488 : // Eventually we will support lowering the @llvm.experimental.deoptimize
2489 : // intrinsic, and right now there are no plans to support other intrinsics
2490 : // with deopt state.
2491 0 : LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
2492 : } else {
2493 496979 : LowerCallTo(&I, getValue(Callee), false, EHPadBB);
2494 : }
2495 :
2496 : // If the value of the invoke is used outside of its defining block, make it
2497 : // available as a virtual register.
2498 : // We already took care of the exported value for the statepoint instruction
2499 : // during call to the LowerStatepoint.
2500 496991 : if (!isStatepoint(I)) {
2501 496983 : CopyToExportRegsIfNeeded(&I);
2502 : }
2503 :
2504 : SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
2505 496991 : BranchProbabilityInfo *BPI = FuncInfo.BPI;
2506 : BranchProbability EHPadBBProb =
2507 43428 : BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
2508 496991 : : BranchProbability::getZero();
2509 496991 : findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
2510 :
2511 : // Update successor info.
2512 496991 : addSuccessorWithProb(InvokeMBB, Return);
2513 994022 : for (auto &UnwindDest : UnwindDests) {
2514 497031 : UnwindDest.first->setIsEHPad();
2515 497031 : addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
2516 : }
2517 : InvokeMBB->normalizeSuccProbs();
2518 :
2519 : // Drop into normal successor.
2520 993982 : DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
2521 : MVT::Other, getControlRoot(),
2522 1490973 : DAG.getBasicBlock(Return)));
2523 496991 : }
2524 :
2525 0 : void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
2526 0 : llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
2527 : }
2528 :
2529 338181 : void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
2530 : assert(FuncInfo.MBB->isEHPad() &&
2531 : "Call to landingpad not in landing pad!");
2532 :
2533 : // If there aren't registers to copy the values into (e.g., during SjLj
2534 : // exceptions), then don't bother to create these DAG nodes.
2535 338181 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2536 338181 : const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
2537 338305 : if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
2538 124 : TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
2539 132 : return;
2540 :
2541 : // If landingpad's return type is token type, we don't create DAG nodes
2542 : // for its exception pointer and selector value. The extraction of exception
2543 : // pointer or selector value from token type landingpads is not currently
2544 : // supported.
2545 676114 : if (LP.getType()->isTokenTy())
2546 : return;
2547 :
2548 : SmallVector<EVT, 2> ValueVTs;
2549 338049 : SDLoc dl = getCurSDLoc();
2550 338049 : ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
2551 : assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
2552 :
2553 : // Get the two live-in registers as SDValues. The physregs have already been
2554 : // copied into virtual registers.
2555 338049 : SDValue Ops[2];
2556 338049 : if (FuncInfo.ExceptionPointerVirtReg) {
2557 338049 : Ops[0] = DAG.getZExtOrTrunc(
2558 338049 : DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2559 338049 : FuncInfo.ExceptionPointerVirtReg,
2560 : TLI.getPointerTy(DAG.getDataLayout())),
2561 1014147 : dl, ValueVTs[0]);
2562 : } else {
2563 0 : Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
2564 : }
2565 338049 : Ops[1] = DAG.getZExtOrTrunc(
2566 338049 : DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2567 338049 : FuncInfo.ExceptionSelectorVirtReg,
2568 : TLI.getPointerTy(DAG.getDataLayout())),
2569 1014147 : dl, ValueVTs[1]);
2570 :
2571 : // Merge into one.
2572 338049 : SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
2573 338049 : DAG.getVTList(ValueVTs), Ops);
2574 338049 : setValue(&LP, Res);
2575 : }
2576 :
2577 11423 : void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
2578 : #ifndef NDEBUG
2579 : for (const CaseCluster &CC : Clusters)
2580 : assert(CC.Low == CC.High && "Input clusters must be single-case");
2581 : #endif
2582 :
2583 : llvm::sort(Clusters, [](const CaseCluster &a, const CaseCluster &b) {
2584 : return a.Low->getValue().slt(b.Low->getValue());
2585 : });
2586 :
2587 : // Merge adjacent clusters with the same destination.
2588 22846 : const unsigned N = Clusters.size();
2589 : unsigned DstIndex = 0;
2590 58189 : for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
2591 46766 : CaseCluster &CC = Clusters[SrcIndex];
2592 46766 : const ConstantInt *CaseVal = CC.Low;
2593 46766 : MachineBasicBlock *Succ = CC.MBB;
2594 :
2595 46766 : if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
2596 81846 : (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
2597 : // If this case has the same successor and is a neighbour, merge it into
2598 : // the previous cluster.
2599 16208 : Clusters[DstIndex - 1].High = CaseVal;
2600 : Clusters[DstIndex - 1].Prob += CC.Prob;
2601 : } else {
2602 115986 : std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
2603 : sizeof(Clusters[SrcIndex]));
2604 : }
2605 : }
2606 11423 : Clusters.resize(DstIndex);
2607 11423 : }
2608 :
2609 0 : void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
2610 : MachineBasicBlock *Last) {
2611 : // Update JTCases.
2612 0 : for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
2613 0 : if (JTCases[i].first.HeaderBB == First)
2614 0 : JTCases[i].first.HeaderBB = Last;
2615 :
2616 : // Update BitTestCases.
2617 0 : for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
2618 0 : if (BitTestCases[i].Parent == First)
2619 0 : BitTestCases[i].Parent = Last;
2620 0 : }
2621 :
2622 97 : void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
2623 97 : MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
2624 :
2625 : // Update machine-CFG edges with unique successors.
2626 : SmallSet<BasicBlock*, 32> Done;
2627 383 : for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
2628 : BasicBlock *BB = I.getSuccessor(i);
2629 286 : bool Inserted = Done.insert(BB).second;
2630 286 : if (!Inserted)
2631 : continue;
2632 :
2633 280 : MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
2634 280 : addSuccessorWithProb(IndirectBrMBB, Succ);
2635 : }
2636 : IndirectBrMBB->normalizeSuccProbs();
2637 :
2638 194 : DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
2639 : MVT::Other, getControlRoot(),
2640 291 : getValue(I.getAddress())));
2641 97 : }
2642 :
2643 28225 : void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
2644 28225 : if (!DAG.getTarget().Options.TrapUnreachable)
2645 : return;
2646 :
2647 : // We may be able to ignore unreachable behind a noreturn call.
2648 674 : if (DAG.getTarget().Options.NoTrapAfterNoreturn) {
2649 557 : const BasicBlock &BB = *I.getParent();
2650 1114 : if (&I != &BB.front()) {
2651 : BasicBlock::const_iterator PredI =
2652 : std::prev(BasicBlock::const_iterator(&I));
2653 : if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
2654 272 : if (Call->doesNotReturn())
2655 : return;
2656 : }
2657 : }
2658 : }
2659 :
2660 2304 : DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
2661 : }
2662 :
2663 8812 : void SelectionDAGBuilder::visitFSub(const User &I) {
2664 : // -0.0 - X --> fneg
2665 8812 : Type *Ty = I.getType();
2666 14017 : if (isa<Constant>(I.getOperand(0)) &&
2667 5205 : I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
2668 4662 : SDValue Op2 = getValue(I.getOperand(1));
2669 18650 : setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
2670 : Op2.getValueType(), Op2));
2671 : return;
2672 : }
2673 :
2674 4150 : visitBinary(I, ISD::FSUB);
2675 : }
2676 :
2677 : /// Checks if the given instruction performs a vector reduction, in which case
2678 : /// we have the freedom to alter the elements in the result as long as the
2679 : /// reduction of them stays unchanged.
2680 487496 : static bool isVectorReductionOp(const User *I) {
2681 : const Instruction *Inst = dyn_cast<Instruction>(I);
2682 974316 : if (!Inst || !Inst->getType()->isVectorTy())
2683 : return false;
2684 :
2685 : auto OpCode = Inst->getOpcode();
2686 : switch (OpCode) {
2687 : case Instruction::Add:
2688 : case Instruction::Mul:
2689 : case Instruction::And:
2690 : case Instruction::Or:
2691 : case Instruction::Xor:
2692 : break;
2693 : case Instruction::FAdd:
2694 : case Instruction::FMul:
2695 : if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2696 8947 : if (FPOp->getFastMathFlags().isFast())
2697 : break;
2698 : LLVM_FALLTHROUGH;
2699 : default:
2700 : return false;
2701 : }
2702 :
2703 : unsigned ElemNum = Inst->getType()->getVectorNumElements();
2704 : // Ensure the reduction size is a power of 2.
2705 : if (!isPowerOf2_32(ElemNum))
2706 : return false;
2707 :
2708 : unsigned ElemNumToReduce = ElemNum;
2709 :
2710 : // Do DFS search on the def-use chain from the given instruction. We only
2711 : // allow four kinds of operations during the search until we reach the
2712 : // instruction that extracts the first element from the vector:
2713 : //
2714 : // 1. The reduction operation of the same opcode as the given instruction.
2715 : //
2716 : // 2. PHI node.
2717 : //
2718 : // 3. ShuffleVector instruction together with a reduction operation that
2719 : // does a partial reduction.
2720 : //
2721 : // 4. ExtractElement that extracts the first element from the vector, and we
2722 : // stop searching the def-use chain here.
2723 : //
2724 : // 3 & 4 above perform a reduction on all elements of the vector. We push defs
2725 : // from 1-3 to the stack to continue the DFS. The given instruction is not
2726 : // a reduction operation if we meet any other instructions other than those
2727 : // listed above.
2728 :
2729 119738 : SmallVector<const User *, 16> UsersToVisit{Inst};
2730 : SmallPtrSet<const User *, 16> Visited;
2731 : bool ReduxExtracted = false;
2732 :
2733 123238 : while (!UsersToVisit.empty()) {
2734 123002 : auto User = UsersToVisit.back();
2735 : UsersToVisit.pop_back();
2736 123002 : if (!Visited.insert(User).second)
2737 : continue;
2738 :
2739 128608 : for (const auto &U : User->users()) {
2740 : auto Inst = dyn_cast<Instruction>(U);
2741 : if (!Inst)
2742 : return false;
2743 :
2744 125656 : if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
2745 : if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2746 439 : if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast())
2747 : return false;
2748 5589 : UsersToVisit.push_back(U);
2749 : } else if (const ShuffleVectorInst *ShufInst =
2750 : dyn_cast<ShuffleVectorInst>(U)) {
2751 : // Detect the following pattern: A ShuffleVector instruction together
2752 : // with a reduction that do partial reduction on the first and second
2753 : // ElemNumToReduce / 2 elements, and store the result in
2754 : // ElemNumToReduce / 2 elements in another vector.
2755 :
2756 : unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
2757 4237 : if (ResultElements < ElemNum)
2758 : return false;
2759 :
2760 4124 : if (ElemNumToReduce == 1)
2761 : return false;
2762 4124 : if (!isa<UndefValue>(U->getOperand(1)))
2763 : return false;
2764 4965 : for (unsigned i = 0; i < ElemNumToReduce / 2; ++i)
2765 4531 : if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2))
2766 : return false;
2767 7157 : for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i)
2768 6733 : if (ShufInst->getMaskValue(i) != -1)
2769 : return false;
2770 :
2771 : // There is only one user of this ShuffleVector instruction, which
2772 : // must be a reduction operation.
2773 424 : if (!U->hasOneUse())
2774 : return false;
2775 :
2776 : auto U2 = dyn_cast<Instruction>(*U->user_begin());
2777 424 : if (!U2 || U2->getOpcode() != OpCode)
2778 : return false;
2779 :
2780 : // Check operands of the reduction operation.
2781 844 : if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) ||
2782 16 : (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) {
2783 422 : UsersToVisit.push_back(U2);
2784 : ElemNumToReduce /= 2;
2785 : } else
2786 : return false;
2787 : } else if (isa<ExtractElementInst>(U)) {
2788 : // At this moment we should have reduced all elements in the vector.
2789 1691 : if (ElemNumToReduce != 1)
2790 : return false;
2791 :
2792 : const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
2793 143 : if (!Val || !Val->isZero())
2794 : return false;
2795 :
2796 : ReduxExtracted = true;
2797 : } else
2798 : return false;
2799 : }
2800 : }
2801 : return ReduxExtracted;
2802 : }
2803 :
2804 487496 : void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
2805 : SDNodeFlags Flags;
2806 : if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
2807 : Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
2808 : Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
2809 : }
2810 : if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) {
2811 : Flags.setExact(ExactOp->isExact());
2812 : }
2813 487496 : if (isVectorReductionOp(&I)) {
2814 : Flags.setVectorReduction(true);
2815 : LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
2816 : }
2817 :
2818 487496 : SDValue Op1 = getValue(I.getOperand(0));
2819 487496 : SDValue Op2 = getValue(I.getOperand(1));
2820 974992 : SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(),
2821 974987 : Op1, Op2, Flags);
2822 487496 : setValue(&I, BinNodeValue);
2823 487496 : }
2824 :
2825 26169 : void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
2826 26169 : SDValue Op1 = getValue(I.getOperand(0));
2827 26169 : SDValue Op2 = getValue(I.getOperand(1));
2828 :
2829 26169 : EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
2830 26169 : Op1.getValueType(), DAG.getDataLayout());
2831 :
2832 : // Coerce the shift amount to the right type if we can.
2833 52338 : if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
2834 15220 : unsigned ShiftSize = ShiftTy.getSizeInBits();
2835 15220 : unsigned Op2Size = Op2.getValueSizeInBits();
2836 15220 : SDLoc DL = getCurSDLoc();
2837 :
2838 : // If the operand is smaller than the shift count type, promote it.
2839 15220 : if (ShiftSize > Op2Size)
2840 1464 : Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
2841 :
2842 : // If the operand is larger than the shift count type but the shift
2843 : // count type has enough bits to represent any shift value, truncate
2844 : // it now. This is a common case and it exposes the truncate to
2845 : // optimization early.
2846 28976 : else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
2847 28958 : Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
2848 : // Otherwise we'll need to temporarily settle for some other convenient
2849 : // type. Type legalization will make adjustments once the shiftee is split.
2850 : else
2851 18 : Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
2852 : }
2853 :
2854 : bool nuw = false;
2855 : bool nsw = false;
2856 : bool exact = false;
2857 :
2858 26169 : if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
2859 :
2860 : if (const OverflowingBinaryOperator *OFBinOp =
2861 : dyn_cast<const OverflowingBinaryOperator>(&I)) {
2862 : nuw = OFBinOp->hasNoUnsignedWrap();
2863 : nsw = OFBinOp->hasNoSignedWrap();
2864 : }
2865 : if (const PossiblyExactOperator *ExactOp =
2866 : dyn_cast<const PossiblyExactOperator>(&I))
2867 : exact = ExactOp->isExact();
2868 : }
2869 : SDNodeFlags Flags;
2870 : Flags.setExact(exact);
2871 : Flags.setNoSignedWrap(nsw);
2872 : Flags.setNoUnsignedWrap(nuw);
2873 52338 : SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
2874 26169 : Flags);
2875 26169 : setValue(&I, Res);
2876 26169 : }
2877 :
2878 5070 : void SelectionDAGBuilder::visitSDiv(const User &I) {
2879 5070 : SDValue Op1 = getValue(I.getOperand(0));
2880 5070 : SDValue Op2 = getValue(I.getOperand(1));
2881 :
2882 : SDNodeFlags Flags;
2883 5070 : Flags.setExact(isa<PossiblyExactOperator>(&I) &&
2884 : cast<PossiblyExactOperator>(&I)->isExact());
2885 20279 : setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
2886 : Op2, Flags));
2887 5070 : }
2888 :
2889 98063 : void SelectionDAGBuilder::visitICmp(const User &I) {
2890 : ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
2891 : if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
2892 : predicate = IC->getPredicate();
2893 : else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
2894 80 : predicate = ICmpInst::Predicate(IC->getPredicate());
2895 98063 : SDValue Op1 = getValue(I.getOperand(0));
2896 98063 : SDValue Op2 = getValue(I.getOperand(1));
2897 98063 : ISD::CondCode Opcode = getICmpCondCode(predicate);
2898 :
2899 98063 : EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2900 98063 : I.getType());
2901 196123 : setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
2902 98063 : }
2903 :
2904 9592 : void SelectionDAGBuilder::visitFCmp(const User &I) {
2905 : FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
2906 : if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
2907 : predicate = FC->getPredicate();
2908 : else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
2909 2 : predicate = FCmpInst::Predicate(FC->getPredicate());
2910 9592 : SDValue Op1 = getValue(I.getOperand(0));
2911 9592 : SDValue Op2 = getValue(I.getOperand(1));
2912 :
2913 9592 : ISD::CondCode Condition = getFCmpCondCode(predicate);
2914 : auto *FPMO = dyn_cast<FPMathOperator>(&I);
2915 9592 : if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath)
2916 1082 : Condition = getFCmpCodeWithoutNaN(Condition);
2917 :
2918 9592 : EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2919 9592 : I.getType());
2920 28776 : setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
2921 9592 : }
2922 :
2923 : // Check if the condition of the select has one use or two users that are both
2924 : // selects with the same condition.
2925 : static bool hasOnlySelectUsers(const Value *Cond) {
2926 : return llvm::all_of(Cond->users(), [](const Value *V) {
2927 : return isa<SelectInst>(V);
2928 : });
2929 : }
2930 :
2931 34284 : void SelectionDAGBuilder::visitSelect(const User &I) {
2932 : SmallVector<EVT, 4> ValueVTs;
2933 34284 : ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
2934 : ValueVTs);
2935 34284 : unsigned NumValues = ValueVTs.size();
2936 34284 : if (NumValues == 0) return;
2937 :
2938 34277 : SmallVector<SDValue, 4> Values(NumValues);
2939 34277 : SDValue Cond = getValue(I.getOperand(0));
2940 34277 : SDValue LHSVal = getValue(I.getOperand(1));
2941 34277 : SDValue RHSVal = getValue(I.getOperand(2));
2942 34277 : auto BaseOps = {Cond};
2943 102831 : ISD::NodeType OpCode = Cond.getValueType().isVector() ?
2944 : ISD::VSELECT : ISD::SELECT;
2945 :
2946 : // Min/max matching is only viable if all output VTs are the same.
2947 34277 : if (is_splat(ValueVTs)) {
2948 34273 : EVT VT = ValueVTs[0];
2949 34273 : LLVMContext &Ctx = *DAG.getContext();
2950 34273 : auto &TLI = DAG.getTargetLoweringInfo();
2951 :
2952 : // We care about the legality of the operation after it has been type
2953 : // legalized.
2954 41312 : while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
2955 7041 : VT != TLI.getTypeToTransformTo(Ctx, VT))
2956 7018 : VT = TLI.getTypeToTransformTo(Ctx, VT);
2957 :
2958 : // If the vselect is legal, assume we want to leave this as a vector setcc +
2959 : // vselect. Otherwise, if this is going to be scalarized, we want to see if
2960 : // min/max is legal on the scalar type.
2961 34273 : bool UseScalarMinMax = VT.isVector() &&
2962 : !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
2963 :
2964 : Value *LHS, *RHS;
2965 34273 : auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
2966 : ISD::NodeType Opc = ISD::DELETED_NODE;
2967 34273 : switch (SPR.Flavor) {
2968 : case SPF_UMAX: Opc = ISD::UMAX; break;
2969 : case SPF_UMIN: Opc = ISD::UMIN; break;
2970 : case SPF_SMAX: Opc = ISD::SMAX; break;
2971 : case SPF_SMIN: Opc = ISD::SMIN; break;
2972 504 : case SPF_FMINNUM:
2973 : switch (SPR.NaNBehavior) {
2974 : case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2975 : case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break;
2976 : case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
2977 351 : case SPNB_RETURNS_ANY: {
2978 : if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
2979 : Opc = ISD::FMINNUM;
2980 : else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT))
2981 : Opc = ISD::FMINNAN;
2982 249 : else if (UseScalarMinMax)
2983 2 : Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
2984 : ISD::FMINNUM : ISD::FMINNAN;
2985 : break;
2986 : }
2987 : }
2988 : break;
2989 545 : case SPF_FMAXNUM:
2990 : switch (SPR.NaNBehavior) {
2991 : case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2992 : case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break;
2993 : case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
2994 372 : case SPNB_RETURNS_ANY:
2995 :
2996 : if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
2997 : Opc = ISD::FMAXNUM;
2998 : else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT))
2999 : Opc = ISD::FMAXNAN;
3000 248 : else if (UseScalarMinMax)
3001 2 : Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
3002 : ISD::FMAXNUM : ISD::FMAXNAN;
3003 : break;
3004 : }
3005 : break;
3006 : default: break;
3007 : }
3008 :
3009 11041 : if (Opc != ISD::DELETED_NODE &&
3010 12406 : (TLI.isOperationLegalOrCustom(Opc, VT) ||
3011 107 : (UseScalarMinMax &&
3012 9885 : TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
3013 : // If the underlying comparison instruction is used by any other
3014 : // instruction, the consumed instructions won't be destroyed, so it is
3015 : // not profitable to convert to a min/max.
3016 : hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
3017 : OpCode = Opc;
3018 9755 : LHSVal = getValue(LHS);
3019 9755 : RHSVal = getValue(RHS);
3020 : BaseOps = {};
3021 : }
3022 : }
3023 :
3024 68559 : for (unsigned i = 0; i != NumValues; ++i) {
3025 : SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
3026 68564 : Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
3027 68564 : Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
3028 137128 : Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
3029 : LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
3030 102846 : Ops);
3031 : }
3032 :
3033 104679 : setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3034 : DAG.getVTList(ValueVTs), Values));
3035 : }
3036 :
3037 37944 : void SelectionDAGBuilder::visitTrunc(const User &I) {
3038 : // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
3039 37944 : SDValue N = getValue(I.getOperand(0));
3040 37944 : EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3041 37944 : I.getType());
3042 113827 : setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
3043 37944 : }
3044 :
3045 39290 : void SelectionDAGBuilder::visitZExt(const User &I) {
3046 : // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3047 : // ZExt also can't be a cast to bool for same reason. So, nothing much to do
3048 39290 : SDValue N = getValue(I.getOperand(0));
3049 39290 : EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3050 39290 : I.getType());
3051 117865 : setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
3052 39290 : }
3053 :
3054 17903 : void SelectionDAGBuilder::visitSExt(const User &I) {
3055 : // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3056 : // SExt also can't be a cast to bool for same reason. So, nothing much to do
3057 17903 : SDValue N = getValue(I.getOperand(0));
3058 17903 : EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3059 17903 : I.getType());
3060 53709 : setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
3061 17903 : }
3062 :
3063 903 : void SelectionDAGBuilder::visitFPTrunc(const User &I) {
3064 : // FPTrunc is never a no-op cast, no need to check
3065 903 : SDValue N = getValue(I.getOperand(0));
3066 903 : SDLoc dl = getCurSDLoc();
3067 903 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3068 903 : EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3069 2709 : setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
3070 : DAG.getTargetConstant(
3071 : 0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
3072 903 : }
3073 :
3074 2428 : void SelectionDAGBuilder::visitFPExt(const User &I) {
3075 : // FPExt is never a no-op cast, no need to check
3076 2428 : SDValue N = getValue(I.getOperand(0));
3077 2428 : EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3078 2428 : I.getType());
3079 7284 : setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
3080 2428 : }
3081 :
3082 3559 : void SelectionDAGBuilder::visitFPToUI(const User &I) {
3083 : // FPToUI is never a no-op cast, no need to check
3084 3559 : SDValue N = getValue(I.getOperand(0));
3085 3559 : EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3086 3559 : I.getType());
3087 10677 : setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
3088 3559 : }
3089 :
3090 1976 : void SelectionDAGBuilder::visitFPToSI(const User &I) {
3091 : // FPToSI is never a no-op cast, no need to check
3092 1976 : SDValue N = getValue(I.getOperand(0));
3093 1976 : EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3094 1976 : I.getType());
3095 5928 : setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
3096 1976 : }
3097 :
3098 7775 : void SelectionDAGBuilder::visitUIToFP(const User &I) {
3099 : // UIToFP is never a no-op cast, no need to check
3100 7775 : SDValue N = getValue(I.getOperand(0));
3101 7775 : EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3102 7775 : I.getType());
3103 23325 : setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
3104 7775 : }
3105 :
3106 3345 : void SelectionDAGBuilder::visitSIToFP(const User &I) {
3107 : // SIToFP is never a no-op cast, no need to check
3108 3345 : SDValue N = getValue(I.getOperand(0));
3109 3345 : EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3110 3345 : I.getType());
3111 10035 : setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
3112 3345 : }
3113 :
3114 14971 : void SelectionDAGBuilder::visitPtrToInt(const User &I) {
3115 : // What to do depends on the size of the integer and the size of the pointer.
3116 : // We can either truncate, zero extend, or no-op, accordingly.
3117 14971 : SDValue N = getValue(I.getOperand(0));
3118 14971 : EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3119 14971 : I.getType());
3120 29862 : setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3121 14971 : }
3122 :
3123 21323 : void SelectionDAGBuilder::visitIntToPtr(const User &I) {
3124 : // What to do depends on the size of the integer and the size of the pointer.
3125 : // We can either truncate, zero extend, or no-op, accordingly.
3126 21323 : SDValue N = getValue(I.getOperand(0));
3127 21323 : EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3128 21323 : I.getType());
3129 42640 : setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3130 21323 : }
3131 :
3132 1478103 : void SelectionDAGBuilder::visitBitCast(const User &I) {
3133 1478103 : SDValue N = getValue(I.getOperand(0));
3134 1478103 : SDLoc dl = getCurSDLoc();
3135 1478103 : EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3136 1478103 : I.getType());
3137 :
3138 : // BitCast assures us that source and destination are the same size so this is
3139 : // either a BITCAST or a no-op.
3140 2956345 : if (DestVT != N.getValueType())
3141 70910 : setValue(&I, DAG.getNode(ISD::BITCAST, dl,
3142 : DestVT, N)); // convert types.
3143 : // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
3144 : // might fold any kind of constant expression to an integer constant and that
3145 : // is not what we are looking for. Only recognize a bitcast of a genuine
3146 : // constant integer as an opaque constant.
3147 : else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
3148 1084 : setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
3149 : /*isOpaque*/true));
3150 : else
3151 1442106 : setValue(&I, N); // noop cast.
3152 1478103 : }
3153 :
3154 306 : void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
3155 306 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3156 : const Value *SV = I.getOperand(0);
3157 306 : SDValue N = getValue(SV);
3158 306 : EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3159 :
3160 306 : unsigned SrcAS = SV->getType()->getPointerAddressSpace();
3161 306 : unsigned DestAS = I.getType()->getPointerAddressSpace();
3162 :
3163 306 : if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
3164 428 : N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
3165 :
3166 306 : setValue(&I, N);
3167 306 : }
3168 :
3169 29821 : void SelectionDAGBuilder::visitInsertElement(const User &I) {
3170 29821 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3171 29821 : SDValue InVec = getValue(I.getOperand(0));
3172 29821 : SDValue InVal = getValue(I.getOperand(1));
3173 29821 : SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
3174 59642 : TLI.getVectorIdxTy(DAG.getDataLayout()));
3175 89463 : setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
3176 : TLI.getValueType(DAG.getDataLayout(), I.getType()),
3177 : InVec, InVal, InIdx));
3178 29821 : }
3179 :
3180 49246 : void SelectionDAGBuilder::visitExtractElement(const User &I) {
3181 49246 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3182 49246 : SDValue InVec = getValue(I.getOperand(0));
3183 49246 : SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
3184 98492 : TLI.getVectorIdxTy(DAG.getDataLayout()));
3185 147738 : setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
3186 : TLI.getValueType(DAG.getDataLayout(), I.getType()),
3187 : InVec, InIdx));
3188 49246 : }
3189 :
3190 46828 : void SelectionDAGBuilder::visitShuffleVector(const User &I) {
3191 46828 : SDValue Src1 = getValue(I.getOperand(0));
3192 46828 : SDValue Src2 = getValue(I.getOperand(1));
3193 46828 : SDLoc DL = getCurSDLoc();
3194 :
3195 : SmallVector<int, 8> Mask;
3196 46828 : ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
3197 46828 : unsigned MaskNumElts = Mask.size();
3198 :
3199 46828 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3200 46828 : EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3201 93656 : EVT SrcVT = Src1.getValueType();
3202 : unsigned SrcNumElts = SrcVT.getVectorNumElements();
3203 :
3204 46828 : if (SrcNumElts == MaskNumElts) {
3205 65716 : setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
3206 32858 : return;
3207 : }
3208 :
3209 : // Normalize the shuffle vector since mask and vector length don't match.
3210 13970 : if (SrcNumElts < MaskNumElts) {
3211 : // Mask is longer than the source vectors. We can use concatenate vector to
3212 : // make the mask and vectors lengths match.
3213 :
3214 5879 : if (MaskNumElts % SrcNumElts == 0) {
3215 : // Mask length is a multiple of the source vector length.
3216 : // Check if the shuffle is some kind of concatenation of the input
3217 : // vectors.
3218 5747 : unsigned NumConcat = MaskNumElts / SrcNumElts;
3219 : bool IsConcat = true;
3220 5747 : SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
3221 101488 : for (unsigned i = 0; i != MaskNumElts; ++i) {
3222 97079 : int Idx = Mask[i];
3223 97079 : if (Idx < 0)
3224 : continue;
3225 : // Ensure the indices in each SrcVT sized piece are sequential and that
3226 : // the same source is used for the whole piece.
3227 94682 : if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
3228 186696 : (ConcatSrcs[i / SrcNumElts] >= 0 &&
3229 75482 : ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
3230 : IsConcat = false;
3231 : break;
3232 : }
3233 : // Remember which source this index came from.
3234 93344 : ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
3235 : }
3236 :
3237 : // The shuffle is concatenating multiple vectors together. Just emit
3238 : // a CONCAT_VECTORS operation.
3239 5747 : if (IsConcat) {
3240 : SmallVector<SDValue, 8> ConcatOps;
3241 21619 : for (auto Src : ConcatSrcs) {
3242 17210 : if (Src < 0)
3243 545 : ConcatOps.push_back(DAG.getUNDEF(SrcVT));
3244 16665 : else if (Src == 0)
3245 6153 : ConcatOps.push_back(Src1);
3246 : else
3247 10512 : ConcatOps.push_back(Src2);
3248 : }
3249 8818 : setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
3250 : return;
3251 : }
3252 : }
3253 :
3254 1470 : unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
3255 1470 : unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
3256 1470 : EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
3257 1470 : PaddedMaskNumElts);
3258 :
3259 : // Pad both vectors with undefs to make them the same length as the mask.
3260 1470 : SDValue UndefVal = DAG.getUNDEF(SrcVT);
3261 :
3262 1470 : SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
3263 : SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
3264 1470 : MOps1[0] = Src1;
3265 1470 : MOps2[0] = Src2;
3266 :
3267 1470 : Src1 = Src1.isUndef()
3268 1470 : ? DAG.getUNDEF(PaddedVT)
3269 2928 : : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
3270 1470 : Src2 = Src2.isUndef()
3271 1470 : ? DAG.getUNDEF(PaddedVT)
3272 810 : : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
3273 :
3274 : // Readjust mask for new input vector length.
3275 1470 : SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
3276 25237 : for (unsigned i = 0; i != MaskNumElts; ++i) {
3277 23767 : int Idx = Mask[i];
3278 23767 : if (Idx >= (int)SrcNumElts)
3279 4755 : Idx -= SrcNumElts - PaddedMaskNumElts;
3280 23767 : MappedOps[i] = Idx;
3281 : }
3282 :
3283 2940 : SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
3284 :
3285 : // If the concatenated vector was padded, extract a subvector with the
3286 : // correct number of elements.
3287 1470 : if (MaskNumElts != PaddedMaskNumElts)
3288 132 : Result = DAG.getNode(
3289 : ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
3290 132 : DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
3291 :
3292 1470 : setValue(&I, Result);
3293 : return;
3294 : }
3295 :
3296 8091 : if (SrcNumElts > MaskNumElts) {
3297 : // Analyze the access pattern of the vector to see if we can extract
3298 : // two subvectors and do the shuffle.
3299 8091 : int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
3300 : bool CanExtract = true;
3301 48238 : for (int Idx : Mask) {
3302 : unsigned Input = 0;
3303 40147 : if (Idx < 0)
3304 : continue;
3305 :
3306 40130 : if (Idx >= (int)SrcNumElts) {
3307 : Input = 1;
3308 1880 : Idx -= SrcNumElts;
3309 : }
3310 :
3311 : // If all the indices come from the same MaskNumElts sized portion of
3312 : // the sources we can use extract. Also make sure the extract wouldn't
3313 : // extract past the end of the source.
3314 40130 : int NewStartIdx = alignDown(Idx, MaskNumElts);
3315 40130 : if (NewStartIdx + MaskNumElts > SrcNumElts ||
3316 40117 : (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
3317 : CanExtract = false;
3318 : // Make sure we always update StartIdx as we use it to track if all
3319 : // elements are undef.
3320 40130 : StartIdx[Input] = NewStartIdx;
3321 : }
3322 :
3323 8091 : if (StartIdx[0] < 0 && StartIdx[1] < 0) {
3324 0 : setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
3325 6489 : return;
3326 : }
3327 8091 : if (CanExtract) {
3328 : // Extract appropriate subvector and generate a vector shuffle
3329 19467 : for (unsigned Input = 0; Input < 2; ++Input) {
3330 12978 : SDValue &Src = Input == 0 ? Src1 : Src2;
3331 12978 : if (StartIdx[Input] < 0)
3332 6437 : Src = DAG.getUNDEF(VT);
3333 : else {
3334 6541 : Src = DAG.getNode(
3335 : ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
3336 : DAG.getConstant(StartIdx[Input], DL,
3337 6541 : TLI.getVectorIdxTy(DAG.getDataLayout())));
3338 : }
3339 : }
3340 :
3341 : // Calculate new mask.
3342 : SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
3343 34014 : for (int &Idx : MappedOps) {
3344 27525 : if (Idx >= (int)SrcNumElts)
3345 1831 : Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
3346 25694 : else if (Idx >= 0)
3347 25683 : Idx -= StartIdx[0];
3348 : }
3349 :
3350 12978 : setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
3351 : return;
3352 : }
3353 : }
3354 :
3355 : // We can't use either concat vectors or extract subvectors so fall back to
3356 : // replacing the shuffle with extract and build vector.
3357 : // to insert and build vector.
3358 1602 : EVT EltVT = VT.getVectorElementType();
3359 1602 : EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
3360 : SmallVector<SDValue,8> Ops;
3361 14224 : for (int Idx : Mask) {
3362 12622 : SDValue Res;
3363 :
3364 12622 : if (Idx < 0) {
3365 6 : Res = DAG.getUNDEF(EltVT);
3366 : } else {
3367 12616 : SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
3368 12616 : if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
3369 :
3370 12616 : Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3371 12616 : EltVT, Src, DAG.getConstant(Idx, DL, IdxVT));
3372 : }
3373 :
3374 12622 : Ops.push_back(Res);
3375 : }
3376 :
3377 3204 : setValue(&I, DAG.getBuildVector(VT, DL, Ops));
3378 : }
3379 :
3380 17981 : void SelectionDAGBuilder::visitInsertValue(const User &I) {
3381 : ArrayRef<unsigned> Indices;
3382 : if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
3383 : Indices = IV->getIndices();
3384 : else
3385 0 : Indices = cast<ConstantExpr>(&I)->getIndices();
3386 :
3387 : const Value *Op0 = I.getOperand(0);
3388 : const Value *Op1 = I.getOperand(1);
3389 17981 : Type *AggTy = I.getType();
3390 17981 : Type *ValTy = Op1->getType();
3391 : bool IntoUndef = isa<UndefValue>(Op0);
3392 : bool FromUndef = isa<UndefValue>(Op1);
3393 :
3394 : unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3395 :
3396 17981 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3397 : SmallVector<EVT, 4> AggValueVTs;
3398 17981 : ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
3399 : SmallVector<EVT, 4> ValValueVTs;
3400 17981 : ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3401 :
3402 17981 : unsigned NumAggValues = AggValueVTs.size();
3403 17981 : unsigned NumValValues = ValValueVTs.size();
3404 17981 : SmallVector<SDValue, 4> Values(NumAggValues);
3405 :
3406 : // Ignore an insertvalue that produces an empty object
3407 17981 : if (!NumAggValues) {
3408 2 : setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3409 : return;
3410 : }
3411 :
3412 17980 : SDValue Agg = getValue(Op0);
3413 : unsigned i = 0;
3414 : // Copy the beginning value(s) from the original aggregate.
3415 28039 : for (; i != LinearIndex; ++i)
3416 10059 : Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3417 : SDValue(Agg.getNode(), Agg.getResNo() + i);
3418 : // Copy values from the inserted value(s).
3419 17980 : if (NumValValues) {
3420 17978 : SDValue Val = getValue(Op1);
3421 35982 : for (; i != LinearIndex + NumValValues; ++i)
3422 53978 : Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3423 17987 : SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
3424 : }
3425 : // Copy remaining value(s) from the original aggregate.
3426 28086 : for (; i != NumAggValues; ++i)
3427 10106 : Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3428 : SDValue(Agg.getNode(), Agg.getResNo() + i);
3429 :
3430 54132 : setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3431 : DAG.getVTList(AggValueVTs), Values));
3432 : }
3433 :
3434 121053 : void SelectionDAGBuilder::visitExtractValue(const User &I) {
3435 : ArrayRef<unsigned> Indices;
3436 : if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
3437 : Indices = EV->getIndices();
3438 : else
3439 1 : Indices = cast<ConstantExpr>(&I)->getIndices();
3440 :
3441 : const Value *Op0 = I.getOperand(0);
3442 121053 : Type *AggTy = Op0->getType();
3443 121053 : Type *ValTy = I.getType();
3444 : bool OutOfUndef = isa<UndefValue>(Op0);
3445 :
3446 : unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3447 :
3448 121053 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3449 : SmallVector<EVT, 4> ValValueVTs;
3450 121053 : ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3451 :
3452 121053 : unsigned NumValValues = ValValueVTs.size();
3453 :
3454 : // Ignore a extractvalue that produces an empty object
3455 121053 : if (!NumValValues) {
3456 8 : setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3457 : return;
3458 : }
3459 :
3460 121049 : SmallVector<SDValue, 4> Values(NumValValues);
3461 :
3462 121049 : SDValue Agg = getValue(Op0);
3463 : // Copy out the selected value(s).
3464 242113 : for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
3465 121064 : Values[i - LinearIndex] =
3466 121079 : OutOfUndef ?
3467 30 : DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
3468 : SDValue(Agg.getNode(), Agg.getResNo() + i);
3469 :
3470 413616 : setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3471 : DAG.getVTList(ValValueVTs), Values));
3472 : }
3473 :
3474 1614131 : void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
3475 : Value *Op0 = I.getOperand(0);
3476 : // Note that the pointer operand may be a vector of pointers. Take the scalar
3477 : // element which holds a pointer.
3478 1614131 : unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
3479 1614131 : SDValue N = getValue(Op0);
3480 1614131 : SDLoc dl = getCurSDLoc();
3481 :
3482 : // Normalize Vector GEP - all scalar operands should be converted to the
3483 : // splat vector.
3484 3228262 : unsigned VectorWidth = I.getType()->isVectorTy() ?
3485 : cast<VectorType>(I.getType())->getVectorNumElements() : 0;
3486 :
3487 1614397 : if (VectorWidth && !N.getValueType().isVector()) {
3488 156 : LLVMContext &Context = *DAG.getContext();
3489 156 : EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
3490 156 : N = DAG.getSplatBuildVector(VT, dl, N);
3491 : }
3492 :
3493 4709958 : for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
3494 7805785 : GTI != E; ++GTI) {
3495 : const Value *Idx = GTI.getOperand();
3496 916353 : if (StructType *StTy = GTI.getStructTypeOrNull()) {
3497 916353 : unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
3498 916353 : if (Field) {
3499 : // N = N + Offset
3500 183669 : uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
3501 :
3502 : // In an inbounds GEP with an offset that is nonnegative even when
3503 : // interpreted as signed, assume there is no unsigned overflow.
3504 : SDNodeFlags Flags;
3505 183669 : if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
3506 : Flags.setNoUnsignedWrap(true);
3507 :
3508 367338 : N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
3509 183669 : DAG.getConstant(Offset, dl, N.getValueType()), Flags);
3510 : }
3511 : } else {
3512 2179474 : unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
3513 2179474 : MVT IdxTy = MVT::getIntegerVT(IdxSize);
3514 2179474 : APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType()));
3515 :
3516 : // If this is a scalar constant or a splat vector of constants,
3517 : // handle it quickly.
3518 : const auto *CI = dyn_cast<ConstantInt>(Idx);
3519 39843 : if (!CI && isa<ConstantDataVector>(Idx) &&
3520 20 : cast<ConstantDataVector>(Idx)->getSplatValue())
3521 14 : CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
3522 :
3523 2179474 : if (CI) {
3524 2139665 : if (CI->isZero())
3525 : continue;
3526 413953 : APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
3527 413953 : LLVMContext &Context = *DAG.getContext();
3528 : SDValue OffsVal = VectorWidth ?
3529 21 : DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
3530 413974 : DAG.getConstant(Offs, dl, IdxTy);
3531 :
3532 : // In an inbouds GEP with an offset that is nonnegative even when
3533 : // interpreted as signed, assume there is no unsigned overflow.
3534 : SDNodeFlags Flags;
3535 413953 : if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
3536 : Flags.setNoUnsignedWrap(true);
3537 :
3538 827906 : N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
3539 : continue;
3540 : }
3541 :
3542 : // N = N + Idx * ElementSize;
3543 39809 : SDValue IdxN = getValue(Idx);
3544 :
3545 79618 : if (!IdxN.getValueType().isVector() && VectorWidth) {
3546 12 : EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
3547 12 : IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
3548 : }
3549 :
3550 : // If the index is smaller or larger than intptr_t, truncate or extend
3551 : // it.
3552 79618 : IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
3553 :
3554 : // If this is a multiply by a power of two, turn it into a shl
3555 : // immediately. This is a very common case.
3556 39809 : if (ElementSize != 1) {
3557 23513 : if (ElementSize.isPowerOf2()) {
3558 : unsigned Amt = ElementSize.logBase2();
3559 21094 : IdxN = DAG.getNode(ISD::SHL, dl,
3560 : N.getValueType(), IdxN,
3561 21094 : DAG.getConstant(Amt, dl, IdxN.getValueType()));
3562 : } else {
3563 4838 : SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType());
3564 2419 : IdxN = DAG.getNode(ISD::MUL, dl,
3565 2419 : N.getValueType(), IdxN, Scale);
3566 : }
3567 : }
3568 :
3569 39809 : N = DAG.getNode(ISD::ADD, dl,
3570 39809 : N.getValueType(), N, IdxN);
3571 : }
3572 : }
3573 :
3574 1614131 : setValue(&I, N);
3575 1614131 : }
3576 :
3577 2094108 : void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
3578 : // If this is a fixed sized alloca in the entry block of the function,
3579 : // allocate it statically on the stack.
3580 2094108 : if (FuncInfo.StaticAllocaMap.count(&I))
3581 2093547 : return; // getValue will auto-populate this.
3582 :
3583 561 : SDLoc dl = getCurSDLoc();
3584 561 : Type *Ty = I.getAllocatedType();
3585 561 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3586 561 : auto &DL = DAG.getDataLayout();
3587 561 : uint64_t TySize = DL.getTypeAllocSize(Ty);
3588 : unsigned Align =
3589 769 : std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
3590 :
3591 561 : SDValue AllocSize = getValue(I.getArraySize());
3592 :
3593 561 : EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
3594 0 : if (AllocSize.getValueType() != IntPtr)
3595 109 : AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
3596 :
3597 561 : AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
3598 : AllocSize,
3599 561 : DAG.getConstant(TySize, dl, IntPtr));
3600 :
3601 : // Handle alignment. If the requested alignment is less than or equal to
3602 : // the stack alignment, ignore it. If the size is greater than or equal to
3603 : // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
3604 : unsigned StackAlign =
3605 1122 : DAG.getSubtarget().getFrameLowering()->getStackAlignment();
3606 561 : if (Align <= StackAlign)
3607 : Align = 0;
3608 :
3609 : // Round the size of the allocation up to the stack alignment size
3610 : // by add SA-1 to the size. This doesn't overflow because we're computing
3611 : // an address inside an alloca.
3612 : SDNodeFlags Flags;
3613 : Flags.setNoUnsignedWrap(true);
3614 1122 : AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
3615 561 : DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags);
3616 :
3617 : // Mask out the low bits for alignment purposes.
3618 561 : AllocSize =
3619 561 : DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
3620 561 : DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr));
3621 :
3622 561 : SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)};
3623 1122 : SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
3624 1122 : SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
3625 561 : setValue(&I, DSA);
3626 561 : DAG.setRoot(DSA.getValue(1));
3627 :
3628 : assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
3629 : }
3630 :
3631 2375761 : void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
3632 2375761 : if (I.isAtomic())
3633 6294 : return visitAtomicLoad(I);
3634 :
3635 2369523 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3636 : const Value *SV = I.getOperand(0);
3637 2369523 : if (TLI.supportSwiftError()) {
3638 : // Swifterror values can come from either a function parameter with
3639 : // swifterror attribute or an alloca with swifterror attribute.
3640 : if (const Argument *Arg = dyn_cast<Argument>(SV)) {
3641 28555 : if (Arg->hasSwiftErrorAttr())
3642 6 : return visitLoadFromSwiftError(I);
3643 : }
3644 :
3645 : if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
3646 1557017 : if (Alloca->isSwiftError())
3647 47 : return visitLoadFromSwiftError(I);
3648 : }
3649 : }
3650 :
3651 2369470 : SDValue Ptr = getValue(SV);
3652 :
3653 2369470 : Type *Ty = I.getType();
3654 :
3655 : bool isVolatile = I.isVolatile();
3656 : bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
3657 : bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
3658 2369470 : bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout());
3659 : unsigned Alignment = I.getAlignment();
3660 :
3661 : AAMDNodes AAInfo;
3662 2369470 : I.getAAMetadata(AAInfo);
3663 : const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3664 :
3665 : SmallVector<EVT, 4> ValueVTs;
3666 : SmallVector<uint64_t, 4> Offsets;
3667 2369470 : ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
3668 2369470 : unsigned NumValues = ValueVTs.size();
3669 2369470 : if (NumValues == 0)
3670 : return;
3671 :
3672 2369467 : SDValue Root;
3673 : bool ConstantMemory = false;
3674 2369467 : if (isVolatile || NumValues > MaxParallelChains)
3675 : // Serialize volatile loads with other side effects.
3676 11605 : Root = getRoot();
3677 2843294 : else if (AA && AA->pointsToConstantMemory(MemoryLocation(
3678 485432 : SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
3679 : // Do not serialize (non-volatile) loads of constant memory with anything.
3680 41874 : Root = DAG.getEntryNode();
3681 : ConstantMemory = true;
3682 : } else {
3683 : // Do not serialize non-volatile loads against each other.
3684 2336925 : Root = DAG.getRoot();
3685 : }
3686 :
3687 2369467 : SDLoc dl = getCurSDLoc();
3688 :
3689 2369467 : if (isVolatile)
3690 11605 : Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
3691 :
3692 : // An aggregate load cannot wrap around the address space, so offsets to its
3693 : // parts don't wrap either.
3694 : SDNodeFlags Flags;
3695 : Flags.setNoUnsignedWrap(true);
3696 :
3697 2369467 : SmallVector<SDValue, 4> Values(NumValues);
3698 4738934 : SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3699 4738934 : EVT PtrVT = Ptr.getValueType();
3700 : unsigned ChainI = 0;
3701 4765291 : for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3702 : // Serializing loads here may result in excessive register pressure, and
3703 : // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
3704 : // could recover a bit by hoisting nodes upward in the chain by recognizing
3705 : // they are side-effect free or do not alias. The optimizer should really
3706 : // avoid this case by converting large object/array copies to llvm.memcpy
3707 : // (MaxParallelChains should always remain as failsafe).
3708 2395824 : if (ChainI == MaxParallelChains) {
3709 : assert(PendingLoads.empty() && "PendingLoads must be serialized first");
3710 0 : SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3711 0 : makeArrayRef(Chains.data(), ChainI));
3712 0 : Root = Chain;
3713 : ChainI = 0;
3714 : }
3715 2395824 : SDValue A = DAG.getNode(ISD::ADD, dl,
3716 : PtrVT, Ptr,
3717 2395824 : DAG.getConstant(Offsets[i], dl, PtrVT),
3718 2395824 : Flags);
3719 : auto MMOFlags = MachineMemOperand::MONone;
3720 2395824 : if (isVolatile)
3721 : MMOFlags |= MachineMemOperand::MOVolatile;
3722 2395824 : if (isNonTemporal)
3723 : MMOFlags |= MachineMemOperand::MONonTemporal;
3724 2395824 : if (isInvariant)
3725 : MMOFlags |= MachineMemOperand::MOInvariant;
3726 2395824 : if (isDereferenceable)
3727 : MMOFlags |= MachineMemOperand::MODereferenceable;
3728 2395824 : MMOFlags |= TLI.getMMOFlags(I);
3729 :
3730 2395824 : SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
3731 : MachinePointerInfo(SV, Offsets[i]), Alignment,
3732 7187472 : MMOFlags, AAInfo, Ranges);
3733 :
3734 2395824 : Values[i] = L;
3735 4791648 : Chains[ChainI] = L.getValue(1);
3736 : }
3737 :
3738 2369467 : if (!ConstantMemory) {
3739 2348530 : SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3740 4697060 : makeArrayRef(Chains.data(), ChainI));
3741 2348530 : if (isVolatile)
3742 11605 : DAG.setRoot(Chain);
3743 : else
3744 2336925 : PendingLoads.push_back(Chain);
3745 : }
3746 :
3747 4738934 : setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
3748 : DAG.getVTList(ValueVTs), Values));
3749 : }
3750 :
3751 110 : void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
3752 : assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
3753 : "call visitStoreToSwiftError when backend supports swifterror");
3754 :
3755 : SmallVector<EVT, 4> ValueVTs;
3756 : SmallVector<uint64_t, 4> Offsets;
3757 : const Value *SrcV = I.getOperand(0);
3758 110 : ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
3759 : SrcV->getType(), ValueVTs, &Offsets);
3760 : assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3761 : "expect a single EVT for swifterror");
3762 :
3763 110 : SDValue Src = getValue(SrcV);
3764 : // Create a virtual register, then update the virtual register.
3765 : unsigned VReg; bool CreatedVReg;
3766 110 : std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I);
3767 : // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
3768 : // Chain can be getRoot or getControlRoot.
3769 110 : SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
3770 220 : SDValue(Src.getNode(), Src.getResNo()));
3771 110 : DAG.setRoot(CopyNode);
3772 110 : if (CreatedVReg)
3773 71 : FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
3774 110 : }
3775 :
3776 53 : void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
3777 : assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
3778 : "call visitLoadFromSwiftError when backend supports swifterror");
3779 :
3780 : assert(!I.isVolatile() &&
3781 : I.getMetadata(LLVMContext::MD_nontemporal) == nullptr &&
3782 : I.getMetadata(LLVMContext::MD_invariant_load) == nullptr &&
3783 : "Support volatile, non temporal, invariant for load_from_swift_error");
3784 :
3785 : const Value *SV = I.getOperand(0);
3786 53 : Type *Ty = I.getType();
3787 : AAMDNodes AAInfo;
3788 53 : I.getAAMetadata(AAInfo);
3789 : assert((!AA || !AA->pointsToConstantMemory(MemoryLocation(
3790 : SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) &&
3791 : "load_from_swift_error should not be constant memory");
3792 :
3793 : SmallVector<EVT, 4> ValueVTs;
3794 : SmallVector<uint64_t, 4> Offsets;
3795 53 : ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
3796 : ValueVTs, &Offsets);
3797 : assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3798 : "expect a single EVT for swifterror");
3799 :
3800 : // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
3801 53 : SDValue L = DAG.getCopyFromReg(
3802 106 : getRoot(), getCurSDLoc(),
3803 53 : FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first,
3804 106 : ValueVTs[0]);
3805 :
3806 53 : setValue(&I, L);
3807 53 : }
3808 :
3809 1963317 : void SelectionDAGBuilder::visitStore(const StoreInst &I) {
3810 1963317 : if (I.isAtomic())
3811 1751 : return visitAtomicStore(I);
3812 :
3813 : const Value *SrcV = I.getOperand(0);
3814 : const Value *PtrV = I.getOperand(1);
3815 :
3816 1961697 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3817 1961697 : if (TLI.supportSwiftError()) {
3818 : // Swifterror values can come from either a function parameter with
3819 : // swifterror attribute or an alloca with swifterror attribute.
3820 : if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
3821 9969 : if (Arg->hasSwiftErrorAttr())
3822 45 : return visitStoreToSwiftError(I);
3823 : }
3824 :
3825 : if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
3826 1360539 : if (Alloca->isSwiftError())
3827 65 : return visitStoreToSwiftError(I);
3828 : }
3829 : }
3830 :
3831 : SmallVector<EVT, 4> ValueVTs;
3832 : SmallVector<uint64_t, 4> Offsets;
3833 1961587 : ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
3834 : SrcV->getType(), ValueVTs, &Offsets);
3835 1961587 : unsigned NumValues = ValueVTs.size();
3836 1961587 : if (NumValues == 0)
3837 : return;
3838 :
3839 : // Get the lowered operands. Note that we do this after
3840 : // checking if NumResults is zero, because with zero results
3841 : // the operands won't have values in the map.
3842 1961566 : SDValue Src = getValue(SrcV);
3843 1961566 : SDValue Ptr = getValue(PtrV);
3844 :
3845 1961566 : SDValue Root = getRoot();
3846 3923132 : SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3847 1961566 : SDLoc dl = getCurSDLoc();
3848 3923132 : EVT PtrVT = Ptr.getValueType();
3849 : unsigned Alignment = I.getAlignment();
3850 : AAMDNodes AAInfo;
3851 1961566 : I.getAAMetadata(AAInfo);
3852 :
3853 : auto MMOFlags = MachineMemOperand::MONone;
3854 1961566 : if (I.isVolatile())
3855 : MMOFlags |= MachineMemOperand::MOVolatile;
3856 410791 : if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
3857 : MMOFlags |= MachineMemOperand::MONonTemporal;
3858 1961566 : MMOFlags |= TLI.getMMOFlags(I);
3859 :
3860 : // An aggregate load cannot wrap around the address space, so offsets to its
3861 : // parts don't wrap either.
3862 : SDNodeFlags Flags;
3863 : Flags.setNoUnsignedWrap(true);
3864 :
3865 : unsigned ChainI = 0;
3866 3930303 : for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3867 : // See visitLoad comments.
3868 1968737 : if (ChainI == MaxParallelChains) {
3869 0 : SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3870 0 : makeArrayRef(Chains.data(), ChainI));
3871 0 : Root = Chain;
3872 : ChainI = 0;
3873 : }
3874 1968737 : SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
3875 3937474 : DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
3876 1968737 : SDValue St = DAG.getStore(
3877 1968737 : Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add,
3878 7874948 : MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo);
3879 3937474 : Chains[ChainI] = St;
3880 : }
3881 :
3882 1961566 : SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3883 3923132 : makeArrayRef(Chains.data(), ChainI));
3884 1961566 : DAG.setRoot(StoreNode);
3885 : }
3886 :
3887 284 : void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
3888 : bool IsCompressing) {
3889 284 : SDLoc sdl = getCurSDLoc();
3890 :
3891 : auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3892 : unsigned& Alignment) {
3893 : // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
3894 : Src0 = I.getArgOperand(0);
3895 : Ptr = I.getArgOperand(1);
3896 : Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3897 : Mask = I.getArgOperand(3);
3898 : };
3899 : auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3900 : unsigned& Alignment) {
3901 : // llvm.masked.compressstore.*(Src0, Ptr, Mask)
3902 128 : Src0 = I.getArgOperand(0);
3903 128 : Ptr = I.getArgOperand(1);
3904 128 : Mask = I.getArgOperand(2);
3905 128 : Alignment = 0;
3906 : };
3907 :
3908 : Value *PtrOperand, *MaskOperand, *Src0Operand;
3909 : unsigned Alignment;
3910 284 : if (IsCompressing)
3911 : getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3912 : else
3913 156 : getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3914 :
3915 284 : SDValue Ptr = getValue(PtrOperand);
3916 284 : SDValue Src0 = getValue(Src0Operand);
3917 284 : SDValue Mask = getValue(MaskOperand);
3918 :
3919 284 : EVT VT = Src0.getValueType();
3920 284 : if (!Alignment)
3921 128 : Alignment = DAG.getEVTAlignment(VT);
3922 :
3923 : AAMDNodes AAInfo;
3924 284 : I.getAAMetadata(AAInfo);
3925 :
3926 : MachineMemOperand *MMO =
3927 284 : DAG.getMachineFunction().
3928 852 : getMachineMemOperand(MachinePointerInfo(PtrOperand),
3929 : MachineMemOperand::MOStore, VT.getStoreSize(),
3930 : Alignment, AAInfo);
3931 284 : SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
3932 : MMO, false /* Truncating */,
3933 284 : IsCompressing);
3934 284 : DAG.setRoot(StoreNode);
3935 284 : setValue(&I, StoreNode);
3936 284 : }
3937 :
3938 : // Get a uniform base for the Gather/Scatter intrinsic.
3939 : // The first argument of the Gather/Scatter intrinsic is a vector of pointers.
3940 : // We try to represent it as a base pointer + vector of indices.
3941 : // Usually, the vector of pointers comes from a 'getelementptr' instruction.
3942 : // The first operand of the GEP may be a single pointer or a vector of pointers
3943 : // Example:
3944 : // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
3945 : // or
3946 : // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
3947 : // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
3948 : //
3949 : // When the first GEP operand is a single pointer - it is the uniform base we
3950 : // are looking for. If first operand of the GEP is a splat vector - we
3951 : // extract the splat value and use it as a uniform base.
3952 : // In all other cases the function returns 'false'.
3953 415 : static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
3954 : SDValue &Scale, SelectionDAGBuilder* SDB) {
3955 415 : SelectionDAG& DAG = SDB->DAG;
3956 415 : LLVMContext &Context = *DAG.getContext();
3957 :
3958 : assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
3959 415 : const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
3960 : if (!GEP)
3961 : return false;
3962 :
3963 : const Value *GEPPtr = GEP->getPointerOperand();
3964 496 : if (!GEPPtr->getType()->isVectorTy())
3965 152 : Ptr = GEPPtr;
3966 96 : else if (!(Ptr = getSplatValue(GEPPtr)))
3967 : return false;
3968 :
3969 242 : unsigned FinalIndex = GEP->getNumOperands() - 1;
3970 : Value *IndexVal = GEP->getOperand(FinalIndex);
3971 :
3972 : // Ensure all the other indices are 0.
3973 248 : for (unsigned i = 1; i < FinalIndex; ++i) {
3974 : auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i));
3975 6 : if (!C || !C->isZero())
3976 : return false;
3977 : }
3978 :
3979 : // The operands of the GEP may be defined in another basic block.
3980 : // In this case we'll not find nodes for the operands.
3981 230 : if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
3982 17 : return false;
3983 :
3984 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3985 213 : const DataLayout &DL = DAG.getDataLayout();
3986 213 : Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()),
3987 639 : SDB->getCurSDLoc(), TLI.getPointerTy(DL));
3988 213 : Base = SDB->getValue(Ptr);
3989 213 : Index = SDB->getValue(IndexVal);
3990 :
3991 639 : if (!Index.getValueType().isVector()) {
3992 6 : unsigned GEPWidth = GEP->getType()->getVectorNumElements();
3993 6 : EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
3994 6 : Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
3995 : }
3996 : return true;
3997 : }
3998 :
3999 97 : void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
4000 97 : SDLoc sdl = getCurSDLoc();
4001 :
4002 : // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
4003 97 : const Value *Ptr = I.getArgOperand(1);
4004 97 : SDValue Src0 = getValue(I.getArgOperand(0));
4005 97 : SDValue Mask = getValue(I.getArgOperand(3));
4006 194 : EVT VT = Src0.getValueType();
4007 97 : unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
4008 97 : if (!Alignment)
4009 0 : Alignment = DAG.getEVTAlignment(VT);
4010 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4011 :
4012 : AAMDNodes AAInfo;
4013 97 : I.getAAMetadata(AAInfo);
4014 :
4015 97 : SDValue Base;
4016 97 : SDValue Index;
4017 97 : SDValue Scale;
4018 97 : const Value *BasePtr = Ptr;
4019 97 : bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
4020 :
4021 97 : const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
4022 97 : MachineMemOperand *MMO = DAG.getMachineFunction().
4023 134 : getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
4024 : MachineMemOperand::MOStore, VT.getStoreSize(),
4025 : Alignment, AAInfo);
4026 97 : if (!UniformBase) {
4027 120 : Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4028 60 : Index = getValue(Ptr);
4029 120 : Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4030 : }
4031 97 : SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };
4032 97 : SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
4033 97 : Ops, MMO);
4034 97 : DAG.setRoot(Scatter);
4035 97 : setValue(&I, Scatter);
4036 97 : }
4037 :
4038 500 : void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
4039 500 : SDLoc sdl = getCurSDLoc();
4040 :
4041 : auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
4042 : unsigned& Alignment) {
4043 : // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
4044 : Ptr = I.getArgOperand(0);
4045 : Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
4046 : Mask = I.getArgOperand(2);
4047 : Src0 = I.getArgOperand(3);
4048 : };
4049 : auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
4050 : unsigned& Alignment) {
4051 : // @llvm.masked.expandload.*(Ptr, Mask, Src0)
4052 198 : Ptr = I.getArgOperand(0);
4053 198 : Alignment = 0;
4054 198 : Mask = I.getArgOperand(1);
4055 198 : Src0 = I.getArgOperand(2);
4056 : };
4057 :
4058 : Value *PtrOperand, *MaskOperand, *Src0Operand;
4059 : unsigned Alignment;
4060 500 : if (IsExpanding)
4061 : getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4062 : else
4063 302 : getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4064 :
4065 500 : SDValue Ptr = getValue(PtrOperand);
4066 500 : SDValue Src0 = getValue(Src0Operand);
4067 500 : SDValue Mask = getValue(MaskOperand);
4068 :
4069 500 : EVT VT = Src0.getValueType();
4070 500 : if (!Alignment)
4071 198 : Alignment = DAG.getEVTAlignment(VT);
4072 :
4073 : AAMDNodes AAInfo;
4074 500 : I.getAAMetadata(AAInfo);
4075 : const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4076 :
4077 : // Do not serialize masked loads of constant memory with anything.
4078 1000 : bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation(
4079 500 : PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
4080 501 : SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
4081 :
4082 : MachineMemOperand *MMO =
4083 500 : DAG.getMachineFunction().
4084 1500 : getMachineMemOperand(MachinePointerInfo(PtrOperand),
4085 : MachineMemOperand::MOLoad, VT.getStoreSize(),
4086 : Alignment, AAInfo, Ranges);
4087 :
4088 500 : SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
4089 500 : ISD::NON_EXTLOAD, IsExpanding);
4090 500 : if (AddToChain)
4091 499 : PendingLoads.push_back(Load.getValue(1));
4092 500 : setValue(&I, Load);
4093 500 : }
4094 :
4095 318 : void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
4096 318 : SDLoc sdl = getCurSDLoc();
4097 :
4098 : // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
4099 318 : const Value *Ptr = I.getArgOperand(0);
4100 318 : SDValue Src0 = getValue(I.getArgOperand(3));
4101 318 : SDValue Mask = getValue(I.getArgOperand(2));
4102 :
4103 318 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4104 318 : EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4105 318 : unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
4106 318 : if (!Alignment)
4107 36 : Alignment = DAG.getEVTAlignment(VT);
4108 :
4109 : AAMDNodes AAInfo;
4110 318 : I.getAAMetadata(AAInfo);
4111 : const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4112 :
4113 318 : SDValue Root = DAG.getRoot();
4114 318 : SDValue Base;
4115 318 : SDValue Index;
4116 318 : SDValue Scale;
4117 318 : const Value *BasePtr = Ptr;
4118 318 : bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
4119 : bool ConstantMemory = false;
4120 176 : if (UniformBase &&
4121 480 : AA && AA->pointsToConstantMemory(MemoryLocation(
4122 162 : BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
4123 : AAInfo))) {
4124 : // Do not serialize (non-volatile) loads of constant memory with anything.
4125 10 : Root = DAG.getEntryNode();
4126 : ConstantMemory = true;
4127 : }
4128 :
4129 : MachineMemOperand *MMO =
4130 318 : DAG.getMachineFunction().
4131 636 : getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
4132 : MachineMemOperand::MOLoad, VT.getStoreSize(),
4133 : Alignment, AAInfo, Ranges);
4134 :
4135 318 : if (!UniformBase) {
4136 284 : Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4137 142 : Index = getValue(Ptr);
4138 284 : Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4139 : }
4140 318 : SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
4141 318 : SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
4142 318 : Ops, MMO);
4143 :
4144 318 : SDValue OutChain = Gather.getValue(1);
4145 318 : if (!ConstantMemory)
4146 313 : PendingLoads.push_back(OutChain);
4147 318 : setValue(&I, Gather);
4148 318 : }
4149 :
4150 6128 : void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
4151 6128 : SDLoc dl = getCurSDLoc();
4152 : AtomicOrdering SuccessOrder = I.getSuccessOrdering();
4153 : AtomicOrdering FailureOrder = I.getFailureOrdering();
4154 6128 : SyncScope::ID SSID = I.getSyncScopeID();
4155 :
4156 6128 : SDValue InChain = getRoot();
4157 :
4158 6128 : MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
4159 12256 : SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
4160 6128 : SDValue L = DAG.getAtomicCmpSwap(
4161 : ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
4162 : getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
4163 : getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()),
4164 24512 : /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID);
4165 :
4166 6128 : SDValue OutChain = L.getValue(2);
4167 :
4168 6128 : setValue(&I, L);
4169 6128 : DAG.setRoot(OutChain);
4170 6128 : }
4171 :
4172 8674 : void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
4173 8674 : SDLoc dl = getCurSDLoc();
4174 : ISD::NodeType NT;
4175 : switch (I.getOperation()) {
4176 0 : default: llvm_unreachable("Unknown atomicrmw operation");
4177 : case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
4178 : case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
4179 : case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
4180 : case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
4181 : case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
4182 : case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
4183 : case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
4184 : case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
4185 : case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
4186 : case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
4187 : case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
4188 : }
4189 : AtomicOrdering Order = I.getOrdering();
4190 8674 : SyncScope::ID SSID = I.getSyncScopeID();
4191 :
4192 8674 : SDValue InChain = getRoot();
4193 :
4194 : SDValue L =
4195 8674 : DAG.getAtomic(NT, dl,
4196 8674 : getValue(I.getValOperand()).getSimpleValueType(),
4197 : InChain,
4198 : getValue(I.getPointerOperand()),
4199 : getValue(I.getValOperand()),
4200 : I.getPointerOperand(),
4201 34696 : /* Alignment=*/ 0, Order, SSID);
4202 :
4203 8674 : SDValue OutChain = L.getValue(1);
4204 :
4205 8674 : setValue(&I, L);
4206 8674 : DAG.setRoot(OutChain);
4207 8674 : }
4208 :
4209 375 : void SelectionDAGBuilder::visitFence(const FenceInst &I) {
4210 375 : SDLoc dl = getCurSDLoc();
4211 375 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4212 375 : SDValue Ops[3];
4213 375 : Ops[0] = getRoot();
4214 750 : Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
4215 375 : TLI.getFenceOperandTy(DAG.getDataLayout()));
4216 1125 : Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl,
4217 375 : TLI.getFenceOperandTy(DAG.getDataLayout()));
4218 750 : DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
4219 375 : }
4220 :
4221 6238 : void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
4222 6238 : SDLoc dl = getCurSDLoc();
4223 : AtomicOrdering Order = I.getOrdering();
4224 6238 : SyncScope::ID SSID = I.getSyncScopeID();
4225 :
4226 6238 : SDValue InChain = getRoot();
4227 :
4228 6238 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4229 6238 : EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4230 :
4231 6238 : if (!TLI.supportsUnalignedAtomics() &&
4232 : I.getAlignment() < VT.getStoreSize())
4233 0 : report_fatal_error("Cannot generate unaligned atomic load");
4234 :
4235 : MachineMemOperand *MMO =
4236 6238 : DAG.getMachineFunction().
4237 18714 : getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
4238 : MachineMemOperand::MOVolatile |
4239 : MachineMemOperand::MOLoad,
4240 : VT.getStoreSize(),
4241 : I.getAlignment() ? I.getAlignment() :
4242 0 : DAG.getEVTAlignment(VT),
4243 6238 : AAMDNodes(), nullptr, SSID, Order);
4244 :
4245 6238 : InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
4246 : SDValue L =
4247 6238 : DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
4248 6238 : getValue(I.getPointerOperand()), MMO);
4249 :
4250 6238 : SDValue OutChain = L.getValue(1);
4251 :
4252 6238 : setValue(&I, L);
4253 6238 : DAG.setRoot(OutChain);
4254 6238 : }
4255 :
4256 1620 : void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
4257 1620 : SDLoc dl = getCurSDLoc();
4258 :
4259 : AtomicOrdering Order = I.getOrdering();
4260 1620 : SyncScope::ID SSID = I.getSyncScopeID();
4261 :
4262 1620 : SDValue InChain = getRoot();
4263 :
4264 1620 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4265 : EVT VT =
4266 1620 : TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
4267 :
4268 1620 : if (I.getAlignment() < VT.getStoreSize())
4269 0 : report_fatal_error("Cannot generate unaligned atomic store");
4270 :
4271 : SDValue OutChain =
4272 1620 : DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
4273 : InChain,
4274 : getValue(I.getPointerOperand()),
4275 : getValue(I.getValueOperand()),
4276 : I.getPointerOperand(), I.getAlignment(),
4277 4860 : Order, SSID);
4278 :
4279 1620 : DAG.setRoot(OutChain);
4280 1620 : }
4281 :
4282 : /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
4283 : /// node.
4284 65328 : void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
4285 : unsigned Intrinsic) {
4286 : // Ignore the callsite's attributes. A specific call site may be marked with
4287 : // readnone, but the lowering code will expect the chain based on the
4288 : // definition.
4289 : const Function *F = I.getCalledFunction();
4290 65328 : bool HasChain = !F->doesNotAccessMemory();
4291 65328 : bool OnlyLoad = HasChain && F->onlyReadsMemory();
4292 :
4293 : // Build the operand list.
4294 : SmallVector<SDValue, 8> Ops;
4295 65328 : if (HasChain) { // If this intrinsic has side-effects, chainify it.
4296 10969 : if (OnlyLoad) {
4297 : // We don't need to serialize loads against other loads.
4298 5114 : Ops.push_back(DAG.getRoot());
4299 : } else {
4300 8412 : Ops.push_back(getRoot());
4301 : }
4302 : }
4303 :
4304 : // Info is set by getTgtMemInstrinsic
4305 65328 : TargetLowering::IntrinsicInfo Info;
4306 65328 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4307 130656 : bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
4308 : DAG.getMachineFunction(),
4309 65328 : Intrinsic);
4310 :
4311 : // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
4312 65328 : if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
4313 : Info.opc == ISD::INTRINSIC_W_CHAIN)
4314 130655 : Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
4315 130636 : TLI.getPointerTy(DAG.getDataLayout())));
4316 :
4317 : // Add all operands of the call to the operand list.
4318 245055 : for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
4319 114399 : SDValue Op = getValue(I.getArgOperand(i));
4320 114399 : Ops.push_back(Op);
4321 : }
4322 :
4323 : SmallVector<EVT, 4> ValueVTs;
4324 65328 : ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
4325 :
4326 65328 : if (HasChain)
4327 10969 : ValueVTs.push_back(MVT::Other);
4328 :
4329 130656 : SDVTList VTs = DAG.getVTList(ValueVTs);
4330 :
4331 : // Create the node.
4332 : SDValue Result;
4333 65328 : if (IsTgtIntrinsic) {
4334 : // This is target intrinsic that touches memory
4335 9504 : Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs,
4336 : Ops, Info.memVT,
4337 4752 : MachinePointerInfo(Info.ptrVal, Info.offset), Info.align,
4338 14256 : Info.flags, Info.size);
4339 60576 : } else if (!HasChain) {
4340 163085 : Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
4341 12434 : } else if (!I.getType()->isVoidTy()) {
4342 6678 : Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
4343 : } else {
4344 11984 : Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
4345 : }
4346 :
4347 65328 : if (HasChain) {
4348 10969 : SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
4349 10969 : if (OnlyLoad)
4350 2557 : PendingLoads.push_back(Chain);
4351 : else
4352 8412 : DAG.setRoot(Chain);
4353 : }
4354 :
4355 130656 : if (!I.getType()->isVoidTy()) {
4356 : if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
4357 25242 : EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
4358 75726 : Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
4359 : } else
4360 34804 : Result = lowerRangeToAssertZExt(DAG, I, Result);
4361 :
4362 60046 : setValue(&I, Result);
4363 : }
4364 65328 : }
4365 :
4366 : /// GetSignificand - Get the significand and build it into a floating-point
4367 : /// number with exponent of 1:
4368 : ///
4369 : /// Op = (Op & 0x007fffff) | 0x3f800000;
4370 : ///
4371 : /// where Op is the hexadecimal representation of floating point value.
4372 9 : static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) {
4373 : SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4374 9 : DAG.getConstant(0x007fffff, dl, MVT::i32));
4375 : SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
4376 9 : DAG.getConstant(0x3f800000, dl, MVT::i32));
4377 9 : return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
4378 : }
4379 :
4380 : /// GetExponent - Get the exponent:
4381 : ///
4382 : /// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
4383 : ///
4384 : /// where Op is the hexadecimal representation of floating point value.
4385 0 : static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
4386 : const TargetLowering &TLI, const SDLoc &dl) {
4387 : SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4388 0 : DAG.getConstant(0x7f800000, dl, MVT::i32));
4389 : SDValue t1 = DAG.getNode(
4390 : ISD::SRL, dl, MVT::i32, t0,
4391 0 : DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
4392 : SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
4393 0 : DAG.getConstant(127, dl, MVT::i32));
4394 0 : return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
4395 : }
4396 :
4397 : /// getF32Constant - Get 32-bit floating point constant.
4398 97 : static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
4399 : const SDLoc &dl) {
4400 194 : return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
4401 97 : MVT::f32);
4402 : }
4403 :
4404 9 : static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
4405 : SelectionDAG &DAG) {
4406 : // TODO: What fast-math-flags should be set on the floating-point nodes?
4407 :
4408 : // IntegerPartOfX = ((int32_t)(t0);
4409 9 : SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
4410 :
4411 : // FractionalPartOfX = t0 - (float)IntegerPartOfX;
4412 9 : SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4413 9 : SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
4414 :
4415 : // IntegerPartOfX <<= 23;
4416 9 : IntegerPartOfX = DAG.getNode(
4417 : ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4418 : DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy(
4419 18 : DAG.getDataLayout())));
4420 :
4421 9 : SDValue TwoToFractionalPartOfX;
4422 9 : if (LimitFloatPrecision <= 6) {
4423 : // For floating-point precision of 6:
4424 : //
4425 : // TwoToFractionalPartOfX =
4426 : // 0.997535578f +
4427 : // (0.735607626f + 0.252464424f * x) * x;
4428 : //
4429 : // error 0.0144103317, which is 6 bits
4430 : SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4431 3 : getF32Constant(DAG, 0x3e814304, dl));
4432 : SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4433 3 : getF32Constant(DAG, 0x3f3c50c8, dl));
4434 3 : SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4435 3 : TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4436 3 : getF32Constant(DAG, 0x3f7f5e7e, dl));
4437 6 : } else if (LimitFloatPrecision <= 12) {
4438 : // For floating-point precision of 12:
4439 : //
4440 : // TwoToFractionalPartOfX =
4441 : // 0.999892986f +
4442 : // (0.696457318f +
4443 : // (0.224338339f + 0.792043434e-1f * x) * x) * x;
4444 : //
4445 : // error 0.000107046256, which is 13 to 14 bits
4446 : SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4447 3 : getF32Constant(DAG, 0x3da235e3, dl));
4448 : SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4449 3 : getF32Constant(DAG, 0x3e65b8f3, dl));
4450 3 : SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4451 : SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4452 3 : getF32Constant(DAG, 0x3f324b07, dl));
4453 3 : SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4454 3 : TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4455 3 : getF32Constant(DAG, 0x3f7ff8fd, dl));
4456 : } else { // LimitFloatPrecision <= 18
4457 : // For floating-point precision of 18:
4458 : //
4459 : // TwoToFractionalPartOfX =
4460 : // 0.999999982f +
4461 : // (0.693148872f +
4462 : // (0.240227044f +
4463 : // (0.554906021e-1f +
4464 : // (0.961591928e-2f +
4465 : // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4466 : // error 2.47208000*10^(-7), which is better than 18 bits
4467 : SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4468 3 : getF32Constant(DAG, 0x3924b03e, dl));
4469 : SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4470 3 : getF32Constant(DAG, 0x3ab24b87, dl));
4471 3 : SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4472 : SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4473 3 : getF32Constant(DAG, 0x3c1d8c17, dl));
4474 3 : SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4475 : SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4476 3 : getF32Constant(DAG, 0x3d634a1d, dl));
4477 3 : SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4478 : SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4479 3 : getF32Constant(DAG, 0x3e75fe14, dl));
4480 3 : SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4481 : SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4482 3 : getF32Constant(DAG, 0x3f317234, dl));
4483 3 : SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4484 3 : TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4485 3 : getF32Constant(DAG, 0x3f800000, dl));
4486 : }
4487 :
4488 : // Add the exponent into the result in integer domain.
4489 9 : SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
4490 : return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4491 9 : DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
4492 : }
4493 :
4494 : /// expandExp - Lower an exp intrinsic. Handles the special sequences for
4495 : /// limited-precision mode.
4496 0 : static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4497 : const TargetLowering &TLI) {
4498 0 : if (Op.getValueType() == MVT::f32 &&
4499 0 : LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4500 :
4501 : // Put the exponent in the right bit position for later addition to the
4502 : // final result:
4503 : //
4504 : // #define LOG2OFe 1.4426950f
4505 : // t0 = Op * LOG2OFe
4506 :
4507 : // TODO: What fast-math-flags should be set here?
4508 : SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
4509 0 : getF32Constant(DAG, 0x3fb8aa3b, dl));
4510 0 : return getLimitedPrecisionExp2(t0, dl, DAG);
4511 : }
4512 :
4513 : // No special expansion.
4514 0 : return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
4515 : }
4516 :
4517 : /// expandLog - Lower a log intrinsic. Handles the special sequences for
4518 : /// limited-precision mode.
4519 0 : static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4520 : const TargetLowering &TLI) {
4521 : // TODO: What fast-math-flags should be set on the floating-point nodes?
4522 :
4523 0 : if (Op.getValueType() == MVT::f32 &&
4524 0 : LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4525 0 : SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4526 :
4527 : // Scale the exponent by log(2) [0.69314718f].
4528 0 : SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4529 : SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4530 0 : getF32Constant(DAG, 0x3f317218, dl));
4531 :
4532 : // Get the significand and build it into a floating-point number with
4533 : // exponent of 1.
4534 0 : SDValue X = GetSignificand(DAG, Op1, dl);
4535 :
4536 0 : SDValue LogOfMantissa;
4537 0 : if (LimitFloatPrecision <= 6) {
4538 : // For floating-point precision of 6:
4539 : //
4540 : // LogofMantissa =
4541 : // -1.1609546f +
4542 : // (1.4034025f - 0.23903021f * x) * x;
4543 : //
4544 : // error 0.0034276066, which is better than 8 bits
4545 : SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4546 0 : getF32Constant(DAG, 0xbe74c456, dl));
4547 : SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4548 0 : getF32Constant(DAG, 0x3fb3a2b1, dl));
4549 0 : SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4550 0 : LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4551 0 : getF32Constant(DAG, 0x3f949a29, dl));
4552 0 : } else if (LimitFloatPrecision <= 12) {
4553 : // For floating-point precision of 12:
4554 : //
4555 : // LogOfMantissa =
4556 : // -1.7417939f +
4557 : // (2.8212026f +
4558 : // (-1.4699568f +
4559 : // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
4560 : //
4561 : // error 0.000061011436, which is 14 bits
4562 : SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4563 0 : getF32Constant(DAG, 0xbd67b6d6, dl));
4564 : SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4565 0 : getF32Constant(DAG, 0x3ee4f4b8, dl));
4566 0 : SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4567 : SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4568 0 : getF32Constant(DAG, 0x3fbc278b, dl));
4569 0 : SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4570 : SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4571 0 : getF32Constant(DAG, 0x40348e95, dl));
4572 0 : SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4573 0 : LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4574 0 : getF32Constant(DAG, 0x3fdef31a, dl));
4575 : } else { // LimitFloatPrecision <= 18
4576 : // For floating-point precision of 18:
4577 : //
4578 : // LogOfMantissa =
4579 : // -2.1072184f +
4580 : // (4.2372794f +
4581 : // (-3.7029485f +
4582 : // (2.2781945f +
4583 : // (-0.87823314f +
4584 : // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
4585 : //
4586 : // error 0.0000023660568, which is better than 18 bits
4587 : SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4588 0 : getF32Constant(DAG, 0xbc91e5ac, dl));
4589 : SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4590 0 : getF32Constant(DAG, 0x3e4350aa, dl));
4591 0 : SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4592 : SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4593 0 : getF32Constant(DAG, 0x3f60d3e3, dl));
4594 0 : SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4595 : SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4596 0 : getF32Constant(DAG, 0x4011cdf0, dl));
4597 0 : SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4598 : SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4599 0 : getF32Constant(DAG, 0x406cfd1c, dl));
4600 0 : SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4601 : SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4602 0 : getF32Constant(DAG, 0x408797cb, dl));
4603 0 : SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4604 0 : LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4605 0 : getF32Constant(DAG, 0x4006dcab, dl));
4606 : }
4607 :
4608 0 : return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
4609 : }
4610 :
4611 : // No special expansion.
4612 0 : return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
4613 : }
4614 :
4615 : /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
4616 : /// limited-precision mode.
4617 0 : static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4618 : const TargetLowering &TLI) {
4619 : // TODO: What fast-math-flags should be set on the floating-point nodes?
4620 :
4621 0 : if (Op.getValueType() == MVT::f32 &&
4622 0 : LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4623 0 : SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4624 :
4625 : // Get the exponent.
4626 0 : SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
4627 :
4628 : // Get the significand and build it into a floating-point number with
4629 : // exponent of 1.
4630 0 : SDValue X = GetSignificand(DAG, Op1, dl);
4631 :
4632 : // Different possible minimax approximations of significand in
4633 : // floating-point for various degrees of accuracy over [1,2].
4634 0 : SDValue Log2ofMantissa;
4635 0 : if (LimitFloatPrecision <= 6) {
4636 : // For floating-point precision of 6:
4637 : //
4638 : // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
4639 : //
4640 : // error 0.0049451742, which is more than 7 bits
4641 : SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4642 0 : getF32Constant(DAG, 0xbeb08fe0, dl));
4643 : SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4644 0 : getF32Constant(DAG, 0x40019463, dl));
4645 0 : SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4646 0 : Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4647 0 : getF32Constant(DAG, 0x3fd6633d, dl));
4648 0 : } else if (LimitFloatPrecision <= 12) {
4649 : // For floating-point precision of 12:
4650 : //
4651 : // Log2ofMantissa =
4652 : // -2.51285454f +
4653 : // (4.07009056f +
4654 : // (-2.12067489f +
4655 : // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
4656 : //
4657 : // error 0.0000876136000, which is better than 13 bits
4658 : SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4659 0 : getF32Constant(DAG, 0xbda7262e, dl));
4660 : SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4661 0 : getF32Constant(DAG, 0x3f25280b, dl));
4662 0 : SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4663 : SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4664 0 : getF32Constant(DAG, 0x4007b923, dl));
4665 0 : SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4666 : SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4667 0 : getF32Constant(DAG, 0x40823e2f, dl));
4668 0 : SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4669 0 : Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4670 0 : getF32Constant(DAG, 0x4020d29c, dl));
4671 : } else { // LimitFloatPrecision <= 18
4672 : // For floating-point precision of 18:
4673 : //
4674 : // Log2ofMantissa =
4675 : // -3.0400495f +
4676 : // (6.1129976f +
4677 : // (-5.3420409f +
4678 : // (3.2865683f +
4679 : // (-1.2669343f +
4680 : // (0.27515199f -
4681 : // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
4682 : //
4683 : // error 0.0000018516, which is better than 18 bits
4684 : SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4685 0 : getF32Constant(DAG, 0xbcd2769e, dl));
4686 : SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4687 0 : getF32Constant(DAG, 0x3e8ce0b9, dl));
4688 0 : SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4689 : SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4690 0 : getF32Constant(DAG, 0x3fa22ae7, dl));
4691 0 : SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4692 : SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4693 0 : getF32Constant(DAG, 0x40525723, dl));
4694 0 : SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4695 : SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4696 0 : getF32Constant(DAG, 0x40aaf200, dl));
4697 0 : SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4698 : SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4699 0 : getF32Constant(DAG, 0x40c39dad, dl));
4700 0 : SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4701 0 : Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4702 0 : getF32Constant(DAG, 0x4042902c, dl));
4703 : }
4704 :
4705 0 : return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
4706 : }
4707 :
4708 : // No special expansion.
4709 0 : return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
4710 : }
4711 :
4712 : /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
4713 : /// limited-precision mode.
4714 0 : static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4715 : const TargetLowering &TLI) {
4716 : // TODO: What fast-math-flags should be set on the floating-point nodes?
4717 :
4718 0 : if (Op.getValueType() == MVT::f32 &&
4719 0 : LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4720 0 : SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4721 :
4722 : // Scale the exponent by log10(2) [0.30102999f].
4723 0 : SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4724 : SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4725 0 : getF32Constant(DAG, 0x3e9a209a, dl));
4726 :
4727 : // Get the significand and build it into a floating-point number with
4728 : // exponent of 1.
4729 0 : SDValue X = GetSignificand(DAG, Op1, dl);
4730 :
4731 0 : SDValue Log10ofMantissa;
4732 0 : if (LimitFloatPrecision <= 6) {
4733 : // For floating-point precision of 6:
4734 : //
4735 : // Log10ofMantissa =
4736 : // -0.50419619f +
4737 : // (0.60948995f - 0.10380950f * x) * x;
4738 : //
4739 : // error 0.0014886165, which is 6 bits
4740 : SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4741 0 : getF32Constant(DAG, 0xbdd49a13, dl));
4742 : SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4743 0 : getF32Constant(DAG, 0x3f1c0789, dl));
4744 0 : SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4745 0 : Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4746 0 : getF32Constant(DAG, 0x3f011300, dl));
4747 0 : } else if (LimitFloatPrecision <= 12) {
4748 : // For floating-point precision of 12:
4749 : //
4750 : // Log10ofMantissa =
4751 : // -0.64831180f +
4752 : // (0.91751397f +
4753 : // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
4754 : //
4755 : // error 0.00019228036, which is better than 12 bits
4756 : SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4757 0 : getF32Constant(DAG, 0x3d431f31, dl));
4758 : SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4759 0 : getF32Constant(DAG, 0x3ea21fb2, dl));
4760 0 : SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4761 : SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4762 0 : getF32Constant(DAG, 0x3f6ae232, dl));
4763 0 : SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4764 0 : Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4765 0 : getF32Constant(DAG, 0x3f25f7c3, dl));
4766 : } else { // LimitFloatPrecision <= 18
4767 : // For floating-point precision of 18:
4768 : //
4769 : // Log10ofMantissa =
4770 : // -0.84299375f +
4771 : // (1.5327582f +
4772 : // (-1.0688956f +
4773 : // (0.49102474f +
4774 : // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
4775 : //
4776 : // error 0.0000037995730, which is better than 18 bits
4777 : SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4778 0 : getF32Constant(DAG, 0x3c5d51ce, dl));
4779 : SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4780 0 : getF32Constant(DAG, 0x3e00685a, dl));
4781 0 : SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4782 : SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4783 0 : getF32Constant(DAG, 0x3efb6798, dl));
4784 0 : SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4785 : SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4786 0 : getF32Constant(DAG, 0x3f88d192, dl));
4787 0 : SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4788 : SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4789 0 : getF32Constant(DAG, 0x3fc4316c, dl));
4790 0 : SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4791 0 : Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
4792 0 : getF32Constant(DAG, 0x3f57ce70, dl));
4793 : }
4794 :
4795 0 : return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
4796 : }
4797 :
4798 : // No special expansion.
4799 0 : return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
4800 : }
4801 :
4802 : /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
4803 : /// limited-precision mode.
4804 0 : static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4805 : const TargetLowering &TLI) {
4806 0 : if (Op.getValueType() == MVT::f32 &&
4807 0 : LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
4808 0 : return getLimitedPrecisionExp2(Op, dl, DAG);
4809 :
4810 : // No special expansion.
4811 0 : return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
4812 : }
4813 :
4814 : /// visitPow - Lower a pow intrinsic. Handles the special sequences for
4815 : /// limited-precision mode with x == 10.0f.
4816 0 : static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
4817 : SelectionDAG &DAG, const TargetLowering &TLI) {
4818 : bool IsExp10 = false;
4819 0 : if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
4820 0 : LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4821 : if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
4822 0 : APFloat Ten(10.0f);
4823 0 : IsExp10 = LHSC->isExactlyValue(Ten);
4824 : }
4825 : }
4826 :
4827 : // TODO: What fast-math-flags should be set on the FMUL node?
4828 0 : if (IsExp10) {
4829 : // Put the exponent in the right bit position for later addition to the
4830 : // final result:
4831 : //
4832 : // #define LOG2OF10 3.3219281f
4833 : // t0 = Op * LOG2OF10;
4834 : SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
4835 0 : getF32Constant(DAG, 0x40549a78, dl));
4836 0 : return getLimitedPrecisionExp2(t0, dl, DAG);
4837 : }
4838 :
4839 : // No special expansion.
4840 0 : return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
4841 : }
4842 :
4843 : /// ExpandPowI - Expand a llvm.powi intrinsic.
4844 106 : static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
4845 : SelectionDAG &DAG) {
4846 : // If RHS is a constant, we can expand this out to a multiplication tree,
4847 : // otherwise we end up lowering to a call to __powidf2 (for example). When
4848 : // optimizing for size, we only want to do this if the expansion would produce
4849 : // a small number of multiplies, otherwise we do the full expansion.
4850 : if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4851 : // Get the exponent as a positive value.
4852 12 : unsigned Val = RHSC->getSExtValue();
4853 12 : if ((int)Val < 0) Val = -Val;
4854 :
4855 : // powi(x, 0) -> 1.0
4856 12 : if (Val == 0)
4857 0 : return DAG.getConstantFP(1.0, DL, LHS.getValueType());
4858 :
4859 12 : const Function &F = DAG.getMachineFunction().getFunction();
4860 12 : if (!F.optForSize() ||
4861 : // If optimizing for size, don't insert too many multiplies.
4862 : // This inserts up to 5 multiplies.
4863 2 : countPopulation(Val) + Log2_32(Val) < 7) {
4864 : // We use the simple binary decomposition method to generate the multiply
4865 : // sequence. There are more optimal ways to do this (for example,
4866 : // powi(x,15) generates one more multiply than it should), but this has
4867 : // the benefit of being both really simple and much better than a libcall.
4868 : SDValue Res; // Logically starts equal to 1.0
4869 10 : SDValue CurSquare = LHS;
4870 : // TODO: Intrinsics should have fast-math-flags that propagate to these
4871 : // nodes.
4872 37 : while (Val) {
4873 27 : if (Val & 1) {
4874 14 : if (Res.getNode())
4875 4 : Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
4876 : else
4877 10 : Res = CurSquare; // 1.0*CurSquare.
4878 : }
4879 :
4880 27 : CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
4881 27 : CurSquare, CurSquare);
4882 27 : Val >>= 1;
4883 : }
4884 :
4885 : // If the original was negative, invert the result, producing 1/(x*x*x).
4886 20 : if (RHSC->getSExtValue() < 0)
4887 1 : Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
4888 1 : DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
4889 10 : return Res;
4890 : }
4891 : }
4892 :
4893 : // Otherwise, expand to a libcall.
4894 96 : return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
4895 : }
4896 :
4897 : // getUnderlyingArgReg - Find underlying register used for a truncated or
4898 : // bitcasted argument.
4899 : static unsigned getUnderlyingArgReg(const SDValue &N) {
4900 48448 : switch (N.getOpcode()) {
4901 23302 : case ISD::CopyFromReg:
4902 23302 : return cast<RegisterSDNode>(N.getOperand(1))->getReg();
4903 628 : case ISD::BITCAST:
4904 : case ISD::AssertZext:
4905 : case ISD::AssertSext:
4906 : case ISD::TRUNCATE:
4907 : return getUnderlyingArgReg(N.getOperand(0));
4908 : default:
4909 : return 0;
4910 : }
4911 : }
4912 :
4913 : /// If the DbgValueInst is a dbg_value of a function argument, create the
4914 : /// corresponding DBG_VALUE machine instruction for it now. At the end of
4915 : /// instruction selection, they will be inserted to the entry BB.
4916 78350 : bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
4917 : const Value *V, DILocalVariable *Variable, DIExpression *Expr,
4918 : DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
4919 : const Argument *Arg = dyn_cast<Argument>(V);
4920 : if (!Arg)
4921 : return false;
4922 :
4923 23622 : MachineFunction &MF = DAG.getMachineFunction();
4924 23622 : const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
4925 :
4926 : bool IsIndirect = false;
4927 : Optional<MachineOperand> Op;
4928 : // Some arguments' frame index is recorded during argument lowering.
4929 23622 : int FI = FuncInfo.getArgumentFrameIndex(Arg);
4930 23622 : if (FI != std::numeric_limits<int>::max())
4931 9 : Op = MachineOperand::CreateFI(FI);
4932 :
4933 23622 : if (!Op && N.getNode()) {
4934 : unsigned Reg = getUnderlyingArgReg(N);
4935 23596 : if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
4936 23302 : MachineRegisterInfo &RegInfo = MF.getRegInfo();
4937 23302 : unsigned PR = RegInfo.getLiveInPhysReg(Reg);
4938 23302 : if (PR)
4939 : Reg = PR;
4940 : }
4941 23596 : if (Reg) {
4942 : Op = MachineOperand::CreateReg(Reg, false);
4943 : IsIndirect = IsDbgDeclare;
4944 : }
4945 : }
4946 :
4947 23622 : if (!Op && N.getNode())
4948 : // Check if frame index is available.
4949 : if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
4950 : if (FrameIndexSDNode *FINode =
4951 267 : dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
4952 534 : Op = MachineOperand::CreateFI(FINode->getIndex());
4953 :
4954 23622 : if (!Op) {
4955 : // Check if ValueMap has reg number.
4956 44 : DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
4957 88 : if (VMI != FuncInfo.ValueMap.end()) {
4958 30 : const auto &TLI = DAG.getTargetLoweringInfo();
4959 30 : RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
4960 57 : V->getType(), getABIRegCopyCC(V));
4961 30 : if (RFV.occupiesMultipleRegs()) {
4962 : unsigned Offset = 0;
4963 15 : for (auto RegAndSize : RFV.getRegsAndSizes()) {
4964 : Op = MachineOperand::CreateReg(RegAndSize.first, false);
4965 : auto FragmentExpr = DIExpression::createFragmentExpression(
4966 9 : Expr, Offset, RegAndSize.second);
4967 9 : if (!FragmentExpr)
4968 : continue;
4969 18 : FuncInfo.ArgDbgValues.push_back(
4970 9 : BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
4971 18 : Op->getReg(), Variable, *FragmentExpr));
4972 9 : Offset += RegAndSize.second;
4973 : }
4974 3 : return true;
4975 : }
4976 27 : Op = MachineOperand::CreateReg(VMI->second, false);
4977 : IsIndirect = IsDbgDeclare;
4978 : }
4979 : }
4980 :
4981 23619 : if (!Op)
4982 : return false;
4983 :
4984 : assert(Variable->isValidLocationForIntrinsic(DL) &&
4985 : "Expected inlined-at fields to agree");
4986 23605 : IsIndirect = (Op->isReg()) ? IsIndirect : true;
4987 47210 : FuncInfo.ArgDbgValues.push_back(
4988 23605 : BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
4989 47210 : *Op, Variable, Expr));
4990 :
4991 23605 : return true;
4992 : }
4993 :
4994 : /// Return the appropriate SDDbgValue based on N.
4995 54739 : SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
4996 : DILocalVariable *Variable,
4997 : DIExpression *Expr,
4998 : const DebugLoc &dl,
4999 : unsigned DbgSDNodeOrder) {
5000 : if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
5001 : // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
5002 : // stack slot locations.
5003 : //
5004 : // Consider "int x = 0; int *px = &x;". There are two kinds of interesting
5005 : // debug values here after optimization:
5006 : //
5007 : // dbg.value(i32* %px, !"int *px", !DIExpression()), and
5008 : // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
5009 : //
5010 : // Both describe the direct values of their associated variables.
5011 16648 : return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(),
5012 16648 : /*IsIndirect*/ false, dl, DbgSDNodeOrder);
5013 : }
5014 38091 : return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(),
5015 38091 : /*IsIndirect*/ false, dl, DbgSDNodeOrder);
5016 : }
5017 :
5018 : // VisualStudio defines setjmp as _setjmp
5019 : #if defined(_MSC_VER) && defined(setjmp) && \
5020 : !defined(setjmp_undefined_for_msvc)
5021 : # pragma push_macro("setjmp")
5022 : # undef setjmp
5023 : # define setjmp_undefined_for_msvc
5024 : #endif
5025 :
5026 : /// Lower the call to the specified intrinsic function. If we want to emit this
5027 : /// as a call to a named external function, return the name. Otherwise, lower it
5028 : /// and return null.
5029 : const char *
5030 543254 : SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
5031 543254 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5032 543254 : SDLoc sdl = getCurSDLoc();
5033 543254 : DebugLoc dl = getCurDebugLoc();
5034 : SDValue Res;
5035 :
5036 543254 : switch (Intrinsic) {
5037 65199 : default:
5038 : // By default, turn this into a target intrinsic node.
5039 65199 : visitTargetIntrinsic(I, Intrinsic);
5040 65199 : return nullptr;
5041 260 : case Intrinsic::vastart: visitVAStart(I); return nullptr;
5042 194 : case Intrinsic::vaend: visitVAEnd(I); return nullptr;
5043 21 : case Intrinsic::vacopy: visitVACopy(I); return nullptr;
5044 81 : case Intrinsic::returnaddress:
5045 162 : setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
5046 81 : TLI.getPointerTy(DAG.getDataLayout()),
5047 81 : getValue(I.getArgOperand(0))));
5048 81 : return nullptr;
5049 6 : case Intrinsic::addressofreturnaddress:
5050 12 : setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
5051 : TLI.getPointerTy(DAG.getDataLayout())));
5052 6 : return nullptr;
5053 120 : case Intrinsic::frameaddress:
5054 240 : setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
5055 120 : TLI.getPointerTy(DAG.getDataLayout()),
5056 120 : getValue(I.getArgOperand(0))));
5057 120 : return nullptr;
5058 297 : case Intrinsic::read_register: {
5059 297 : Value *Reg = I.getArgOperand(0);
5060 297 : SDValue Chain = getRoot();
5061 : SDValue RegName =
5062 297 : DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
5063 297 : EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
5064 297 : Res = DAG.getNode(ISD::READ_REGISTER, sdl,
5065 297 : DAG.getVTList(VT, MVT::Other), Chain, RegName);
5066 297 : setValue(&I, Res);
5067 297 : DAG.setRoot(Res.getValue(1));
5068 : return nullptr;
5069 : }
5070 179 : case Intrinsic::write_register: {
5071 179 : Value *Reg = I.getArgOperand(0);
5072 : Value *RegValue = I.getArgOperand(1);
5073 179 : SDValue Chain = getRoot();
5074 : SDValue RegName =
5075 179 : DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
5076 179 : DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
5077 179 : RegName, getValue(RegValue)));
5078 : return nullptr;
5079 : }
5080 0 : case Intrinsic::setjmp:
5081 0 : return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
5082 0 : case Intrinsic::longjmp:
5083 0 : return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
5084 : case Intrinsic::memcpy: {
5085 : const auto &MCI = cast<MemCpyInst>(I);
5086 99387 : SDValue Op1 = getValue(I.getArgOperand(0));
5087 99387 : SDValue Op2 = getValue(I.getArgOperand(1));
5088 99387 : SDValue Op3 = getValue(I.getArgOperand(2));
5089 : // @llvm.memcpy defines 0 and 1 to both mean no alignment.
5090 99387 : unsigned DstAlign = std::max<unsigned>(MCI.getDestAlignment(), 1);
5091 99538 : unsigned SrcAlign = std::max<unsigned>(MCI.getSourceAlignment(), 1);
5092 99387 : unsigned Align = MinAlign(DstAlign, SrcAlign);
5093 99387 : bool isVol = MCI.isVolatile();
5094 99387 : bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
5095 : // FIXME: Support passing different dest/src alignments to the memcpy DAG
5096 : // node.
5097 99387 : SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
5098 : false, isTC,
5099 : MachinePointerInfo(I.getArgOperand(0)),
5100 99387 : MachinePointerInfo(I.getArgOperand(1)));
5101 99387 : updateDAGForMaybeTailCall(MC);
5102 : return nullptr;
5103 : }
5104 : case Intrinsic::memset: {
5105 : const auto &MSI = cast<MemSetInst>(I);
5106 155847 : SDValue Op1 = getValue(I.getArgOperand(0));
5107 155847 : SDValue Op2 = getValue(I.getArgOperand(1));
5108 155847 : SDValue Op3 = getValue(I.getArgOperand(2));
5109 : // @llvm.memset defines 0 and 1 to both mean no alignment.
5110 155847 : unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1);
5111 155847 : bool isVol = MSI.isVolatile();
5112 155847 : bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
5113 155847 : SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
5114 155847 : isTC, MachinePointerInfo(I.getArgOperand(0)));
5115 155847 : updateDAGForMaybeTailCall(MS);
5116 : return nullptr;
5117 : }
5118 : case Intrinsic::memmove: {
5119 : const auto &MMI = cast<MemMoveInst>(I);
5120 3439 : SDValue Op1 = getValue(I.getArgOperand(0));
5121 3439 : SDValue Op2 = getValue(I.getArgOperand(1));
5122 3439 : SDValue Op3 = getValue(I.getArgOperand(2));
5123 : // @llvm.memmove defines 0 and 1 to both mean no alignment.
5124 3439 : unsigned DstAlign = std::max<unsigned>(MMI.getDestAlignment(), 1);
5125 3450 : unsigned SrcAlign = std::max<unsigned>(MMI.getSourceAlignment(), 1);
5126 3439 : unsigned Align = MinAlign(DstAlign, SrcAlign);
5127 3439 : bool isVol = MMI.isVolatile();
5128 3439 : bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
5129 : // FIXME: Support passing different dest/src alignments to the memmove DAG
5130 : // node.
5131 3439 : SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
5132 : isTC, MachinePointerInfo(I.getArgOperand(0)),
5133 3439 : MachinePointerInfo(I.getArgOperand(1)));
5134 3439 : updateDAGForMaybeTailCall(MM);
5135 : return nullptr;
5136 : }
5137 : case Intrinsic::memcpy_element_unordered_atomic: {
5138 : const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I);
5139 6 : SDValue Dst = getValue(MI.getRawDest());
5140 6 : SDValue Src = getValue(MI.getRawSource());
5141 6 : SDValue Length = getValue(MI.getLength());
5142 :
5143 : unsigned DstAlign = MI.getDestAlignment();
5144 : unsigned SrcAlign = MI.getSourceAlignment();
5145 6 : Type *LengthTy = MI.getLength()->getType();
5146 : unsigned ElemSz = MI.getElementSizeInBytes();
5147 6 : bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
5148 6 : SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src,
5149 : SrcAlign, Length, LengthTy, ElemSz, isTC,
5150 : MachinePointerInfo(MI.getRawDest()),
5151 6 : MachinePointerInfo(MI.getRawSource()));
5152 6 : updateDAGForMaybeTailCall(MC);
5153 : return nullptr;
5154 : }
5155 : case Intrinsic::memmove_element_unordered_atomic: {
5156 : auto &MI = cast<AtomicMemMoveInst>(I);
5157 6 : SDValue Dst = getValue(MI.getRawDest());
5158 6 : SDValue Src = getValue(MI.getRawSource());
5159 6 : SDValue Length = getValue(MI.getLength());
5160 :
5161 : unsigned DstAlign = MI.getDestAlignment();
5162 : unsigned SrcAlign = MI.getSourceAlignment();
5163 6 : Type *LengthTy = MI.getLength()->getType();
5164 : unsigned ElemSz = MI.getElementSizeInBytes();
5165 6 : bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
5166 6 : SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src,
5167 : SrcAlign, Length, LengthTy, ElemSz, isTC,
5168 : MachinePointerInfo(MI.getRawDest()),
5169 6 : MachinePointerInfo(MI.getRawSource()));
5170 6 : updateDAGForMaybeTailCall(MC);
5171 : return nullptr;
5172 : }
5173 : case Intrinsic::memset_element_unordered_atomic: {
5174 : auto &MI = cast<AtomicMemSetInst>(I);
5175 6 : SDValue Dst = getValue(MI.getRawDest());
5176 6 : SDValue Val = getValue(MI.getValue());
5177 6 : SDValue Length = getValue(MI.getLength());
5178 :
5179 : unsigned DstAlign = MI.getDestAlignment();
5180 6 : Type *LengthTy = MI.getLength()->getType();
5181 : unsigned ElemSz = MI.getElementSizeInBytes();
5182 6 : bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
5183 6 : SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length,
5184 : LengthTy, ElemSz, isTC,
5185 6 : MachinePointerInfo(MI.getRawDest()));
5186 6 : updateDAGForMaybeTailCall(MC);
5187 : return nullptr;
5188 : }
5189 : case Intrinsic::dbg_addr:
5190 : case Intrinsic::dbg_declare: {
5191 : const auto &DI = cast<DbgVariableIntrinsic>(I);
5192 : DILocalVariable *Variable = DI.getVariable();
5193 : DIExpression *Expression = DI.getExpression();
5194 674 : dropDanglingDebugInfo(Variable, Expression);
5195 : assert(Variable && "Missing variable");
5196 :
5197 : // Check if address has undef value.
5198 674 : const Value *Address = DI.getVariableLocation();
5199 674 : if (!Address || isa<UndefValue>(Address) ||
5200 605 : (Address->use_empty() && !isa<Argument>(Address))) {
5201 : LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
5202 : return nullptr;
5203 : }
5204 :
5205 572 : bool isParameter = Variable->isParameter() || isa<Argument>(Address);
5206 :
5207 : // Check if this variable can be described by a frame index, typically
5208 : // either as a static alloca or a byval parameter.
5209 : int FI = std::numeric_limits<int>::max();
5210 : if (const auto *AI =
5211 572 : dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) {
5212 482 : if (AI->isStaticAlloca()) {
5213 461 : auto I = FuncInfo.StaticAllocaMap.find(AI);
5214 922 : if (I != FuncInfo.StaticAllocaMap.end())
5215 461 : FI = I->second;
5216 : }
5217 90 : } else if (const auto *Arg = dyn_cast<Argument>(
5218 : Address->stripInBoundsConstantOffsets())) {
5219 81 : FI = FuncInfo.getArgumentFrameIndex(Arg);
5220 : }
5221 :
5222 : // llvm.dbg.addr is control dependent and always generates indirect
5223 : // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
5224 : // the MachineFunction variable table.
5225 542 : if (FI != std::numeric_limits<int>::max()) {
5226 478 : if (Intrinsic == Intrinsic::dbg_addr) {
5227 3 : SDDbgValue *SDV = DAG.getFrameIndexDbgValue(
5228 : Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder);
5229 3 : DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter);
5230 : }
5231 478 : return nullptr;
5232 : }
5233 :
5234 94 : SDValue &N = NodeMap[Address];
5235 94 : if (!N.getNode() && isa<Argument>(Address))
5236 : // Check unused arguments map.
5237 17 : N = UnusedArgNodeMap[Address];
5238 : SDDbgValue *SDV;
5239 94 : if (N.getNode()) {
5240 75 : if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
5241 2 : Address = BCI->getOperand(0);
5242 : // Parameters are handled specially.
5243 : auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
5244 75 : if (isParameter && FINode) {
5245 : // Byval parameter. We have a frame index at this point.
5246 : SDV =
5247 0 : DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(),
5248 : /*IsIndirect*/ true, dl, SDNodeOrder);
5249 150 : } else if (isa<Argument>(Address)) {
5250 : // Address is an argument, so try to emit its dbg value using
5251 : // virtual register info from the FuncInfo.ValueMap.
5252 47 : EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N);
5253 47 : return nullptr;
5254 : } else {
5255 28 : SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
5256 : true, dl, SDNodeOrder);
5257 : }
5258 28 : DAG.AddDbgValue(SDV, N.getNode(), isParameter);
5259 : } else {
5260 : // If Address is an argument then try to emit its dbg value using
5261 : // virtual register info from the FuncInfo.ValueMap.
5262 19 : if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,
5263 : N)) {
5264 : LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
5265 : }
5266 : }
5267 : return nullptr;
5268 : }
5269 : case Intrinsic::dbg_label: {
5270 : const DbgLabelInst &DI = cast<DbgLabelInst>(I);
5271 : DILabel *Label = DI.getLabel();
5272 : assert(Label && "Missing label");
5273 :
5274 : SDDbgLabel *SDV;
5275 1 : SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder);
5276 1 : DAG.AddDbgLabel(SDV);
5277 1 : return nullptr;
5278 : }
5279 : case Intrinsic::dbg_value: {
5280 : const DbgValueInst &DI = cast<DbgValueInst>(I);
5281 : assert(DI.getVariable() && "Missing variable");
5282 :
5283 : DILocalVariable *Variable = DI.getVariable();
5284 : DIExpression *Expression = DI.getExpression();
5285 122834 : dropDanglingDebugInfo(Variable, Expression);
5286 122834 : const Value *V = DI.getValue();
5287 122834 : if (!V)
5288 : return nullptr;
5289 :
5290 : SDDbgValue *SDV;
5291 119200 : if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
5292 25834 : SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder);
5293 25834 : DAG.AddDbgValue(SDV, nullptr, false);
5294 25834 : return nullptr;
5295 : }
5296 :
5297 : // Do not use getValue() in here; we don't want to generate code at
5298 : // this point if it hasn't been done yet.
5299 93366 : SDValue N = NodeMap[V];
5300 93366 : if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
5301 15574 : N = UnusedArgNodeMap[V];
5302 93366 : if (N.getNode()) {
5303 55711 : if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, false, N))
5304 : return nullptr;
5305 44676 : SDV = getDbgValue(N, Variable, Expression, dl, SDNodeOrder);
5306 44676 : DAG.AddDbgValue(SDV, N.getNode(), false);
5307 44676 : return nullptr;
5308 : }
5309 :
5310 : // PHI nodes have already been selected, so we should know which VReg that
5311 : // is assigns to already.
5312 37655 : if (isa<PHINode>(V)) {
5313 10688 : auto VMI = FuncInfo.ValueMap.find(V);
5314 21376 : if (VMI != FuncInfo.ValueMap.end()) {
5315 10688 : unsigned Reg = VMI->second;
5316 : // The PHI node may be split up into several MI PHI nodes (in
5317 : // FunctionLoweringInfo::set).
5318 10688 : RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
5319 21376 : V->getType(), None);
5320 10688 : if (RFV.occupiesMultipleRegs()) {
5321 : unsigned Offset = 0;
5322 : unsigned BitsToDescribe = 0;
5323 7 : if (auto VarSize = Variable->getSizeInBits())
5324 7 : BitsToDescribe = *VarSize;
5325 7 : if (auto Fragment = Expression->getFragmentInfo())
5326 2 : BitsToDescribe = Fragment->SizeInBits;
5327 29 : for (auto RegAndSize : RFV.getRegsAndSizes()) {
5328 : unsigned RegisterSize = RegAndSize.second;
5329 : // Bail out if all bits are described already.
5330 16 : if (Offset >= BitsToDescribe)
5331 : break;
5332 15 : unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
5333 15 : ? BitsToDescribe - Offset
5334 : : RegisterSize;
5335 : auto FragmentExpr = DIExpression::createFragmentExpression(
5336 15 : Expression, Offset, FragmentSize);
5337 15 : if (!FragmentExpr)
5338 : continue;
5339 15 : SDV = DAG.getVRegDbgValue(Variable, *FragmentExpr, RegAndSize.first,
5340 : false, dl, SDNodeOrder);
5341 15 : DAG.AddDbgValue(SDV, nullptr, false);
5342 : Offset += RegisterSize;
5343 : }
5344 : } else {
5345 10681 : SDV = DAG.getVRegDbgValue(Variable, Expression, Reg, false, dl,
5346 : SDNodeOrder);
5347 10681 : DAG.AddDbgValue(SDV, nullptr, false);
5348 : }
5349 : return nullptr;
5350 : }
5351 : }
5352 :
5353 : // TODO: When we get here we will either drop the dbg.value completely, or
5354 : // we try to move it forward by letting it dangle for awhile. So we should
5355 : // probably add an extra DbgValue to the DAG here, with a reference to
5356 : // "noreg", to indicate that we have lost the debug location for the
5357 : // variable.
5358 :
5359 26967 : if (!V->use_empty() ) {
5360 : // Do not call getValue(V) yet, as we don't want to generate code.
5361 : // Remember it for later.
5362 26451 : DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder);
5363 26451 : return nullptr;
5364 : }
5365 :
5366 : LLVM_DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
5367 : LLVM_DEBUG(dbgs() << " Last seen at:\n " << *V << "\n");
5368 : return nullptr;
5369 : }
5370 :
5371 11387 : case Intrinsic::eh_typeid_for: {
5372 : // Find the type id for the given typeinfo.
5373 11387 : GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
5374 11387 : unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV);
5375 22774 : Res = DAG.getConstant(TypeID, sdl, MVT::i32);
5376 11387 : setValue(&I, Res);
5377 11387 : return nullptr;
5378 : }
5379 :
5380 29 : case Intrinsic::eh_return_i32:
5381 : case Intrinsic::eh_return_i64:
5382 29 : DAG.getMachineFunction().setCallsEHReturn(true);
5383 29 : DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl,
5384 : MVT::Other,
5385 : getControlRoot(),
5386 : getValue(I.getArgOperand(0)),
5387 58 : getValue(I.getArgOperand(1))));
5388 29 : return nullptr;
5389 15 : case Intrinsic::eh_unwind_init:
5390 15 : DAG.getMachineFunction().setCallsUnwindInit(true);
5391 15 : return nullptr;
5392 19 : case Intrinsic::eh_dwarf_cfa:
5393 38 : setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
5394 19 : TLI.getPointerTy(DAG.getDataLayout()),
5395 19 : getValue(I.getArgOperand(0))));
5396 19 : return nullptr;
5397 175 : case Intrinsic::eh_sjlj_callsite: {
5398 175 : MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
5399 175 : ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
5400 : assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
5401 : assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
5402 :
5403 175 : MMI.setCurrentCallSite(CI->getZExtValue());
5404 175 : return nullptr;
5405 : }
5406 36 : case Intrinsic::eh_sjlj_functioncontext: {
5407 : // Get and store the index of the function context.
5408 36 : MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
5409 : AllocaInst *FnCtx =
5410 36 : cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
5411 36 : int FI = FuncInfo.StaticAllocaMap[FnCtx];
5412 : MFI.setFunctionContextIndex(FI);
5413 36 : return nullptr;
5414 : }
5415 29 : case Intrinsic::eh_sjlj_setjmp: {
5416 29 : SDValue Ops[2];
5417 29 : Ops[0] = getRoot();
5418 29 : Ops[1] = getValue(I.getArgOperand(0));
5419 29 : SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl,
5420 29 : DAG.getVTList(MVT::i32, MVT::Other), Ops);
5421 29 : setValue(&I, Op.getValue(0));
5422 29 : DAG.setRoot(Op.getValue(1));
5423 : return nullptr;
5424 : }
5425 20 : case Intrinsic::eh_sjlj_longjmp:
5426 20 : DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
5427 20 : getRoot(), getValue(I.getArgOperand(0))));
5428 20 : return nullptr;
5429 36 : case Intrinsic::eh_sjlj_setup_dispatch:
5430 36 : DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
5431 36 : getRoot()));
5432 36 : return nullptr;
5433 318 : case Intrinsic::masked_gather:
5434 318 : visitMaskedGather(I);
5435 318 : return nullptr;
5436 302 : case Intrinsic::masked_load:
5437 302 : visitMaskedLoad(I);
5438 302 : return nullptr;
5439 97 : case Intrinsic::masked_scatter:
5440 97 : visitMaskedScatter(I);
5441 97 : return nullptr;
5442 156 : case Intrinsic::masked_store:
5443 156 : visitMaskedStore(I);
5444 156 : return nullptr;
5445 198 : case Intrinsic::masked_expandload:
5446 198 : visitMaskedLoad(I, true /* IsExpanding */);
5447 198 : return nullptr;
5448 128 : case Intrinsic::masked_compressstore:
5449 128 : visitMaskedStore(I, true /* IsCompressing */);
5450 128 : return nullptr;
5451 154 : case Intrinsic::x86_mmx_pslli_w:
5452 : case Intrinsic::x86_mmx_pslli_d:
5453 : case Intrinsic::x86_mmx_pslli_q:
5454 : case Intrinsic::x86_mmx_psrli_w:
5455 : case Intrinsic::x86_mmx_psrli_d:
5456 : case Intrinsic::x86_mmx_psrli_q:
5457 : case Intrinsic::x86_mmx_psrai_w:
5458 : case Intrinsic::x86_mmx_psrai_d: {
5459 154 : SDValue ShAmt = getValue(I.getArgOperand(1));
5460 : if (isa<ConstantSDNode>(ShAmt)) {
5461 129 : visitTargetIntrinsic(I, Intrinsic);
5462 129 : return nullptr;
5463 : }
5464 : unsigned NewIntrinsic = 0;
5465 25 : EVT ShAmtVT = MVT::v2i32;
5466 : switch (Intrinsic) {
5467 : case Intrinsic::x86_mmx_pslli_w:
5468 : NewIntrinsic = Intrinsic::x86_mmx_psll_w;
5469 : break;
5470 2 : case Intrinsic::x86_mmx_pslli_d:
5471 : NewIntrinsic = Intrinsic::x86_mmx_psll_d;
5472 2 : break;
5473 9 : case Intrinsic::x86_mmx_pslli_q:
5474 : NewIntrinsic = Intrinsic::x86_mmx_psll_q;
5475 9 : break;
5476 2 : case Intrinsic::x86_mmx_psrli_w:
5477 : NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
5478 2 : break;
5479 2 : case Intrinsic::x86_mmx_psrli_d:
5480 : NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
5481 2 : break;
5482 4 : case Intrinsic::x86_mmx_psrli_q:
5483 : NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
5484 4 : break;
5485 2 : case Intrinsic::x86_mmx_psrai_w:
5486 : NewIntrinsic = Intrinsic::x86_mmx_psra_w;
5487 2 : break;
5488 2 : case Intrinsic::x86_mmx_psrai_d:
5489 : NewIntrinsic = Intrinsic::x86_mmx_psra_d;
5490 2 : break;
5491 0 : default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
5492 : }
5493 :
5494 : // The vector shift intrinsics with scalars uses 32b shift amounts but
5495 : // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
5496 : // to be zero.
5497 : // We must do this early because v2i32 is not a legal type.
5498 25 : SDValue ShOps[2];
5499 25 : ShOps[0] = ShAmt;
5500 50 : ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);
5501 50 : ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps);
5502 25 : EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
5503 50 : ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
5504 25 : Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
5505 25 : DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
5506 25 : getValue(I.getArgOperand(0)), ShAmt);
5507 25 : setValue(&I, Res);
5508 25 : return nullptr;
5509 : }
5510 106 : case Intrinsic::powi:
5511 212 : setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
5512 106 : getValue(I.getArgOperand(1)), DAG));
5513 106 : return nullptr;
5514 80 : case Intrinsic::log:
5515 80 : setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
5516 80 : return nullptr;
5517 85 : case Intrinsic::log2:
5518 85 : setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
5519 85 : return nullptr;
5520 85 : case Intrinsic::log10:
5521 85 : setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
5522 85 : return nullptr;
5523 96 : case Intrinsic::exp:
5524 96 : setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
5525 96 : return nullptr;
5526 102 : case Intrinsic::exp2:
5527 102 : setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
5528 102 : return nullptr;
5529 131 : case Intrinsic::pow:
5530 262 : setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
5531 131 : getValue(I.getArgOperand(1)), DAG, TLI));
5532 131 : return nullptr;
5533 7769 : case Intrinsic::sqrt:
5534 : case Intrinsic::fabs:
5535 : case Intrinsic::sin:
5536 : case Intrinsic::cos:
5537 : case Intrinsic::floor:
5538 : case Intrinsic::ceil:
5539 : case Intrinsic::trunc:
5540 : case Intrinsic::rint:
5541 : case Intrinsic::nearbyint:
5542 : case Intrinsic::round:
5543 : case Intrinsic::canonicalize: {
5544 : unsigned Opcode;
5545 : switch (Intrinsic) {
5546 0 : default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
5547 : case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
5548 2659 : case Intrinsic::fabs: Opcode = ISD::FABS; break;
5549 143 : case Intrinsic::sin: Opcode = ISD::FSIN; break;
5550 93 : case Intrinsic::cos: Opcode = ISD::FCOS; break;
5551 435 : case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
5552 2272 : case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
5553 290 : case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
5554 196 : case Intrinsic::rint: Opcode = ISD::FRINT; break;
5555 189 : case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
5556 92 : case Intrinsic::round: Opcode = ISD::FROUND; break;
5557 533 : case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break;
5558 : }
5559 :
5560 23307 : setValue(&I, DAG.getNode(Opcode, sdl,
5561 7769 : getValue(I.getArgOperand(0)).getValueType(),
5562 7769 : getValue(I.getArgOperand(0))));
5563 7769 : return nullptr;
5564 : }
5565 781 : case Intrinsic::minnum: {
5566 781 : auto VT = getValue(I.getArgOperand(0)).getValueType();
5567 : unsigned Opc =
5568 781 : I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT)
5569 781 : ? ISD::FMINNAN
5570 : : ISD::FMINNUM;
5571 1562 : setValue(&I, DAG.getNode(Opc, sdl, VT,
5572 : getValue(I.getArgOperand(0)),
5573 : getValue(I.getArgOperand(1))));
5574 : return nullptr;
5575 : }
5576 762 : case Intrinsic::maxnum: {
5577 762 : auto VT = getValue(I.getArgOperand(0)).getValueType();
5578 : unsigned Opc =
5579 762 : I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT)
5580 762 : ? ISD::FMAXNAN
5581 : : ISD::FMAXNUM;
5582 1524 : setValue(&I, DAG.getNode(Opc, sdl, VT,
5583 : getValue(I.getArgOperand(0)),
5584 : getValue(I.getArgOperand(1))));
5585 : return nullptr;
5586 : }
5587 18 : case Intrinsic::minimum:
5588 72 : setValue(&I, DAG.getNode(ISD::FMINNAN, sdl,
5589 18 : getValue(I.getArgOperand(0)).getValueType(),
5590 : getValue(I.getArgOperand(0)),
5591 18 : getValue(I.getArgOperand(1))));
5592 18 : return nullptr;
5593 18 : case Intrinsic::maximum:
5594 72 : setValue(&I, DAG.getNode(ISD::FMAXNAN, sdl,
5595 18 : getValue(I.getArgOperand(0)).getValueType(),
5596 : getValue(I.getArgOperand(0)),
5597 18 : getValue(I.getArgOperand(1))));
5598 18 : return nullptr;
5599 656 : case Intrinsic::copysign:
5600 2624 : setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
5601 656 : getValue(I.getArgOperand(0)).getValueType(),
5602 : getValue(I.getArgOperand(0)),
5603 656 : getValue(I.getArgOperand(1))));
5604 656 : return nullptr;
5605 3179 : case Intrinsic::fma:
5606 15895 : setValue(&I, DAG.getNode(ISD::FMA, sdl,
5607 3179 : getValue(I.getArgOperand(0)).getValueType(),
5608 : getValue(I.getArgOperand(0)),
5609 : getValue(I.getArgOperand(1)),
5610 3179 : getValue(I.getArgOperand(2))));
5611 3179 : return nullptr;
5612 : case Intrinsic::experimental_constrained_fadd:
5613 : case Intrinsic::experimental_constrained_fsub:
5614 : case Intrinsic::experimental_constrained_fmul:
5615 : case Intrinsic::experimental_constrained_fdiv:
5616 : case Intrinsic::experimental_constrained_frem:
5617 : case Intrinsic::experimental_constrained_fma:
5618 : case Intrinsic::experimental_constrained_sqrt:
5619 : case Intrinsic::experimental_constrained_pow:
5620 : case Intrinsic::experimental_constrained_powi:
5621 : case Intrinsic::experimental_constrained_sin:
5622 : case Intrinsic::experimental_constrained_cos:
5623 : case Intrinsic::experimental_constrained_exp:
5624 : case Intrinsic::experimental_constrained_exp2:
5625 : case Intrinsic::experimental_constrained_log:
5626 : case Intrinsic::experimental_constrained_log10:
5627 : case Intrinsic::experimental_constrained_log2:
5628 : case Intrinsic::experimental_constrained_rint:
5629 : case Intrinsic::experimental_constrained_nearbyint:
5630 226 : visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
5631 226 : return nullptr;
5632 1035 : case Intrinsic::fmuladd: {
5633 1035 : EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
5634 2070 : if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
5635 1035 : TLI.isFMAFasterThanFMulAndFAdd(VT)) {
5636 775 : setValue(&I, DAG.getNode(ISD::FMA, sdl,
5637 310 : getValue(I.getArgOperand(0)).getValueType(),
5638 : getValue(I.getArgOperand(0)),
5639 : getValue(I.getArgOperand(1)),
5640 155 : getValue(I.getArgOperand(2))));
5641 : } else {
5642 : // TODO: Intrinsic calls should have fast-math-flags.
5643 880 : SDValue Mul = DAG.getNode(ISD::FMUL, sdl,
5644 880 : getValue(I.getArgOperand(0)).getValueType(),
5645 : getValue(I.getArgOperand(0)),
5646 3520 : getValue(I.getArgOperand(1)));
5647 880 : SDValue Add = DAG.getNode(ISD::FADD, sdl,
5648 880 : getValue(I.getArgOperand(0)).getValueType(),
5649 : Mul,
5650 2640 : getValue(I.getArgOperand(2)));
5651 880 : setValue(&I, Add);
5652 : }
5653 : return nullptr;
5654 : }
5655 230 : case Intrinsic::convert_to_fp16:
5656 460 : setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
5657 : DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
5658 230 : getValue(I.getArgOperand(0)),
5659 : DAG.getTargetConstant(0, sdl,
5660 : MVT::i32))));
5661 230 : return nullptr;
5662 276 : case Intrinsic::convert_from_fp16:
5663 552 : setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
5664 276 : TLI.getValueType(DAG.getDataLayout(), I.getType()),
5665 : DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
5666 276 : getValue(I.getArgOperand(0)))));
5667 276 : return nullptr;
5668 0 : case Intrinsic::pcmarker: {
5669 0 : SDValue Tmp = getValue(I.getArgOperand(0));
5670 0 : DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
5671 : return nullptr;
5672 : }
5673 23 : case Intrinsic::readcyclecounter: {
5674 23 : SDValue Op = getRoot();
5675 23 : Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl,
5676 23 : DAG.getVTList(MVT::i64, MVT::Other), Op);
5677 23 : setValue(&I, Res);
5678 23 : DAG.setRoot(Res.getValue(1));
5679 : return nullptr;
5680 : }
5681 281 : case Intrinsic::bitreverse:
5682 843 : setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
5683 281 : getValue(I.getArgOperand(0)).getValueType(),
5684 281 : getValue(I.getArgOperand(0))));
5685 281 : return nullptr;
5686 634 : case Intrinsic::bswap:
5687 1902 : setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
5688 634 : getValue(I.getArgOperand(0)).getValueType(),
5689 634 : getValue(I.getArgOperand(0))));
5690 634 : return nullptr;
5691 775 : case Intrinsic::cttz: {
5692 775 : SDValue Arg = getValue(I.getArgOperand(0));
5693 : ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
5694 775 : EVT Ty = Arg.getValueType();
5695 1994 : setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
5696 : sdl, Ty, Arg));
5697 : return nullptr;
5698 : }
5699 1880 : case Intrinsic::ctlz: {
5700 1880 : SDValue Arg = getValue(I.getArgOperand(0));
5701 : ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
5702 1880 : EVT Ty = Arg.getValueType();
5703 5200 : setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
5704 : sdl, Ty, Arg));
5705 : return nullptr;
5706 : }
5707 707 : case Intrinsic::ctpop: {
5708 707 : SDValue Arg = getValue(I.getArgOperand(0));
5709 707 : EVT Ty = Arg.getValueType();
5710 1414 : setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
5711 : return nullptr;
5712 : }
5713 138 : case Intrinsic::fshl:
5714 : case Intrinsic::fshr: {
5715 : bool IsFSHL = Intrinsic == Intrinsic::fshl;
5716 138 : SDValue X = getValue(I.getArgOperand(0));
5717 138 : SDValue Y = getValue(I.getArgOperand(1));
5718 138 : SDValue Z = getValue(I.getArgOperand(2));
5719 138 : EVT VT = X.getValueType();
5720 138 : SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT);
5721 138 : SDValue Zero = DAG.getConstant(0, sdl, VT);
5722 276 : SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC);
5723 :
5724 : // When X == Y, this is rotate. If the data type has a power-of-2 size, we
5725 : // avoid the select that is necessary in the general case to filter out
5726 : // the 0-shift possibility that leads to UB.
5727 138 : if (X == Y && isPowerOf2_32(VT.getScalarSizeInBits())) {
5728 : // TODO: This should also be done if the operation is custom, but we have
5729 : // to make sure targets are handling the modulo shift amount as expected.
5730 66 : auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
5731 : if (TLI.isOperationLegal(RotateOpcode, VT)) {
5732 50 : setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
5733 25 : return nullptr;
5734 : }
5735 :
5736 : // Some targets only rotate one way. Try the opposite direction.
5737 41 : RotateOpcode = IsFSHL ? ISD::ROTR : ISD::ROTL;
5738 : if (TLI.isOperationLegal(RotateOpcode, VT)) {
5739 : // Negate the shift amount because it is safe to ignore the high bits.
5740 14 : SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
5741 14 : setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt));
5742 : return nullptr;
5743 : }
5744 :
5745 : // fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW))
5746 : // fshr (rotr): (X << ((0 - Z) % BW)) | (X >> (Z % BW))
5747 68 : SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
5748 68 : SDValue NShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC);
5749 85 : SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt);
5750 85 : SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt);
5751 68 : setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY));
5752 34 : return nullptr;
5753 : }
5754 :
5755 : // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
5756 : // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
5757 144 : SDValue InvShAmt = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, ShAmt);
5758 180 : SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt);
5759 180 : SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt);
5760 144 : SDValue Or = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY);
5761 :
5762 : // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
5763 : // and that is undefined. We must compare and select to avoid UB.
5764 72 : EVT CCVT = MVT::i1;
5765 72 : if (VT.isVector())
5766 6 : CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements());
5767 :
5768 : // For fshl, 0-shift returns the 1st arg (X).
5769 : // For fshr, 0-shift returns the 2nd arg (Y).
5770 72 : SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
5771 108 : setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or));
5772 72 : return nullptr;
5773 : }
5774 8 : case Intrinsic::sadd_sat: {
5775 8 : SDValue Op1 = getValue(I.getArgOperand(0));
5776 8 : SDValue Op2 = getValue(I.getArgOperand(1));
5777 24 : setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2));
5778 : return nullptr;
5779 : }
5780 166 : case Intrinsic::stacksave: {
5781 166 : SDValue Op = getRoot();
5782 166 : Res = DAG.getNode(
5783 : ISD::STACKSAVE, sdl,
5784 332 : DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op);
5785 166 : setValue(&I, Res);
5786 166 : DAG.setRoot(Res.getValue(1));
5787 : return nullptr;
5788 : }
5789 75 : case Intrinsic::stackrestore:
5790 75 : Res = getValue(I.getArgOperand(0));
5791 150 : DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
5792 75 : return nullptr;
5793 4 : case Intrinsic::get_dynamic_area_offset: {
5794 4 : SDValue Op = getRoot();
5795 4 : EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
5796 4 : EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
5797 : // Result type for @llvm.get.dynamic.area.offset should match PtrTy for
5798 : // target.
5799 4 : if (PtrTy != ResTy)
5800 0 : report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
5801 : " intrinsic!");
5802 8 : Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
5803 4 : Op);
5804 4 : DAG.setRoot(Op);
5805 4 : setValue(&I, Res);
5806 : return nullptr;
5807 : }
5808 354 : case Intrinsic::stackguard: {
5809 354 : EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
5810 354 : MachineFunction &MF = DAG.getMachineFunction();
5811 354 : const Module &M = *MF.getFunction().getParent();
5812 354 : SDValue Chain = getRoot();
5813 354 : if (TLI.useLoadStackGuardNode()) {
5814 156 : Res = getLoadStackGuard(DAG, sdl, Chain);
5815 : } else {
5816 198 : const Value *Global = TLI.getSDagStackGuard(M);
5817 198 : unsigned Align = DL->getPrefTypeAlignment(Global->getType());
5818 198 : Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
5819 : MachinePointerInfo(Global, 0), Align,
5820 198 : MachineMemOperand::MOVolatile);
5821 : }
5822 354 : if (TLI.useStackGuardXorFP())
5823 137 : Res = TLI.emitStackGuardXorFP(DAG, Res, sdl);
5824 354 : DAG.setRoot(Chain);
5825 354 : setValue(&I, Res);
5826 : return nullptr;
5827 : }
5828 1237 : case Intrinsic::stackprotector: {
5829 : // Emit code into the DAG to store the stack guard onto the stack.
5830 1237 : MachineFunction &MF = DAG.getMachineFunction();
5831 1237 : MachineFrameInfo &MFI = MF.getFrameInfo();
5832 : EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
5833 1237 : SDValue Src, Chain = getRoot();
5834 :
5835 1237 : if (TLI.useLoadStackGuardNode())
5836 142 : Src = getLoadStackGuard(DAG, sdl, Chain);
5837 : else
5838 1095 : Src = getValue(I.getArgOperand(0)); // The guard's value.
5839 :
5840 1237 : AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
5841 :
5842 1237 : int FI = FuncInfo.StaticAllocaMap[Slot];
5843 : MFI.setStackProtectorIndex(FI);
5844 :
5845 1237 : SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
5846 :
5847 : // Store the stack protector onto the stack.
5848 1237 : Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(
5849 : DAG.getMachineFunction(), FI),
5850 1237 : /* Alignment = */ 0, MachineMemOperand::MOVolatile);
5851 1237 : setValue(&I, Res);
5852 1237 : DAG.setRoot(Res);
5853 : return nullptr;
5854 : }
5855 0 : case Intrinsic::objectsize: {
5856 : // If we don't know by now, we're never going to know.
5857 0 : ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
5858 :
5859 : assert(CI && "Non-constant type in __builtin_object_size?");
5860 :
5861 0 : SDValue Arg = getValue(I.getCalledValue());
5862 0 : EVT Ty = Arg.getValueType();
5863 :
5864 0 : if (CI->isZero())
5865 0 : Res = DAG.getConstant(-1ULL, sdl, Ty);
5866 : else
5867 0 : Res = DAG.getConstant(0, sdl, Ty);
5868 :
5869 0 : setValue(&I, Res);
5870 : return nullptr;
5871 : }
5872 4 : case Intrinsic::annotation:
5873 : case Intrinsic::ptr_annotation:
5874 : case Intrinsic::launder_invariant_group:
5875 : case Intrinsic::strip_invariant_group:
5876 : // Drop the intrinsic, but forward the value
5877 4 : setValue(&I, getValue(I.getOperand(0)));
5878 4 : return nullptr;
5879 : case Intrinsic::assume:
5880 : case Intrinsic::var_annotation:
5881 : case Intrinsic::sideeffect:
5882 : // Discard annotate attributes, assumptions, and artificial side-effects.
5883 : return nullptr;
5884 :
5885 1 : case Intrinsic::codeview_annotation: {
5886 : // Emit a label associated with this metadata.
5887 1 : MachineFunction &MF = DAG.getMachineFunction();
5888 : MCSymbol *Label =
5889 2 : MF.getMMI().getContext().createTempSymbol("annotation", true);
5890 1 : Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
5891 : MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
5892 1 : Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
5893 1 : DAG.setRoot(Res);
5894 1 : return nullptr;
5895 : }
5896 :
5897 4 : case Intrinsic::init_trampoline: {
5898 4 : const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
5899 :
5900 4 : SDValue Ops[6];
5901 4 : Ops[0] = getRoot();
5902 4 : Ops[1] = getValue(I.getArgOperand(0));
5903 4 : Ops[2] = getValue(I.getArgOperand(1));
5904 4 : Ops[3] = getValue(I.getArgOperand(2));
5905 4 : Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
5906 4 : Ops[5] = DAG.getSrcValue(F);
5907 :
5908 8 : Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);
5909 :
5910 4 : DAG.setRoot(Res);
5911 : return nullptr;
5912 : }
5913 4 : case Intrinsic::adjust_trampoline:
5914 8 : setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
5915 4 : TLI.getPointerTy(DAG.getDataLayout()),
5916 4 : getValue(I.getArgOperand(0))));
5917 4 : return nullptr;
5918 2 : case Intrinsic::gcroot: {
5919 : assert(DAG.getMachineFunction().getFunction().hasGC() &&
5920 : "only valid in functions with gc specified, enforced by Verifier");
5921 : assert(GFI && "implied by previous");
5922 2 : const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
5923 : const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
5924 :
5925 2 : FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
5926 2 : GFI->addStackRoot(FI->getIndex(), TypeMap);
5927 2 : return nullptr;
5928 : }
5929 : case Intrinsic::gcread:
5930 : case Intrinsic::gcwrite:
5931 : llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
5932 7 : case Intrinsic::flt_rounds:
5933 14 : setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32));
5934 7 : return nullptr;
5935 :
5936 11 : case Intrinsic::expect:
5937 : // Just replace __builtin_expect(exp, c) with EXP.
5938 11 : setValue(&I, getValue(I.getArgOperand(0)));
5939 11 : return nullptr;
5940 :
5941 277 : case Intrinsic::debugtrap:
5942 : case Intrinsic::trap: {
5943 : StringRef TrapFuncName =
5944 277 : I.getAttributes()
5945 554 : .getAttribute(AttributeList::FunctionIndex, "trap-func-name")
5946 277 : .getValueAsString();
5947 277 : if (TrapFuncName.empty()) {
5948 268 : ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
5949 : ISD::TRAP : ISD::DEBUGTRAP;
5950 536 : DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot()));
5951 268 : return nullptr;
5952 : }
5953 : TargetLowering::ArgListTy Args;
5954 :
5955 18 : TargetLowering::CallLoweringInfo CLI(DAG);
5956 9 : CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
5957 : CallingConv::C, I.getType(),
5958 9 : DAG.getExternalSymbol(TrapFuncName.data(),
5959 : TLI.getPointerTy(DAG.getDataLayout())),
5960 18 : std::move(Args));
5961 :
5962 9 : std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
5963 9 : DAG.setRoot(Result.second);
5964 : return nullptr;
5965 : }
5966 :
5967 1335 : case Intrinsic::uadd_with_overflow:
5968 : case Intrinsic::sadd_with_overflow:
5969 : case Intrinsic::usub_with_overflow:
5970 : case Intrinsic::ssub_with_overflow:
5971 : case Intrinsic::umul_with_overflow:
5972 : case Intrinsic::smul_with_overflow: {
5973 : ISD::NodeType Op;
5974 : switch (Intrinsic) {
5975 0 : default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
5976 : case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
5977 338 : case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
5978 244 : case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
5979 285 : case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
5980 83 : case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
5981 55 : case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
5982 : }
5983 1335 : SDValue Op1 = getValue(I.getArgOperand(0));
5984 1335 : SDValue Op2 = getValue(I.getArgOperand(1));
5985 :
5986 4005 : SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
5987 1335 : setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
5988 : return nullptr;
5989 : }
5990 208 : case Intrinsic::prefetch: {
5991 208 : SDValue Ops[5];
5992 208 : unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
5993 208 : auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
5994 208 : Ops[0] = DAG.getRoot();
5995 208 : Ops[1] = getValue(I.getArgOperand(0));
5996 208 : Ops[2] = getValue(I.getArgOperand(1));
5997 208 : Ops[3] = getValue(I.getArgOperand(2));
5998 208 : Ops[4] = getValue(I.getArgOperand(3));
5999 208 : SDValue Result = DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl,
6000 208 : DAG.getVTList(MVT::Other), Ops,
6001 208 : EVT::getIntegerVT(*Context, 8),
6002 : MachinePointerInfo(I.getArgOperand(0)),
6003 : 0, /* align */
6004 416 : Flags);
6005 :
6006 : // Chain the prefetch in parallell with any pending loads, to stay out of
6007 : // the way of later optimizations.
6008 208 : PendingLoads.push_back(Result);
6009 208 : Result = getRoot();
6010 208 : DAG.setRoot(Result);
6011 : return nullptr;
6012 : }
6013 56941 : case Intrinsic::lifetime_start:
6014 : case Intrinsic::lifetime_end: {
6015 : bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
6016 : // Stack coloring is not enabled in O0, discard region information.
6017 56941 : if (TM.getOptLevel() == CodeGenOpt::None)
6018 : return nullptr;
6019 :
6020 : SmallVector<Value *, 4> Allocas;
6021 56938 : GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL);
6022 :
6023 56942 : for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(),
6024 113880 : E = Allocas.end(); Object != E; ++Object) {
6025 56944 : AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
6026 :
6027 : // Could not find an Alloca.
6028 : if (!LifetimeObject)
6029 2 : continue;
6030 :
6031 : // First check that the Alloca is static, otherwise it won't have a
6032 : // valid frame index.
6033 56942 : auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject);
6034 113884 : if (SI == FuncInfo.StaticAllocaMap.end())
6035 2 : return nullptr;
6036 :
6037 56940 : int FI = SI->second;
6038 :
6039 56940 : SDValue Ops[2];
6040 56940 : Ops[0] = getRoot();
6041 56940 : Ops[1] =
6042 56940 : DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true);
6043 56940 : unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
6044 :
6045 113880 : Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
6046 56940 : DAG.setRoot(Res);
6047 : }
6048 : return nullptr;
6049 : }
6050 336 : case Intrinsic::invariant_start:
6051 : // Discard region information.
6052 672 : setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
6053 336 : return nullptr;
6054 : case Intrinsic::invariant_end:
6055 : // Discard region information.
6056 : return nullptr;
6057 3 : case Intrinsic::clear_cache:
6058 3 : return TLI.getClearCacheBuiltinName();
6059 : case Intrinsic::donothing:
6060 : // ignore
6061 : return nullptr;
6062 140 : case Intrinsic::experimental_stackmap:
6063 140 : visitStackmap(I);
6064 140 : return nullptr;
6065 : case Intrinsic::experimental_patchpoint_void:
6066 : case Intrinsic::experimental_patchpoint_i64:
6067 144 : visitPatchpoint(&I);
6068 144 : return nullptr;
6069 59 : case Intrinsic::experimental_gc_statepoint:
6070 59 : LowerStatepoint(ImmutableStatepoint(&I));
6071 59 : return nullptr;
6072 : case Intrinsic::experimental_gc_result:
6073 24 : visitGCResult(cast<GCResultInst>(I));
6074 24 : return nullptr;
6075 : case Intrinsic::experimental_gc_relocate:
6076 66 : visitGCRelocate(cast<GCRelocateInst>(I));
6077 66 : return nullptr;
6078 : case Intrinsic::instrprof_increment:
6079 : llvm_unreachable("instrprof failed to lower an increment");
6080 : case Intrinsic::instrprof_value_profile:
6081 : llvm_unreachable("instrprof failed to lower a value profiling call");
6082 11 : case Intrinsic::localescape: {
6083 11 : MachineFunction &MF = DAG.getMachineFunction();
6084 11 : const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
6085 :
6086 : // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
6087 : // is the same on all targets.
6088 39 : for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
6089 : Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
6090 17 : if (isa<ConstantPointerNull>(Arg))
6091 : continue; // Skip null pointers. They represent a hole in index space.
6092 : AllocaInst *Slot = cast<AllocaInst>(Arg);
6093 : assert(FuncInfo.StaticAllocaMap.count(Slot) &&
6094 : "can only escape static allocas");
6095 17 : int FI = FuncInfo.StaticAllocaMap[Slot];
6096 : MCSymbol *FrameAllocSym =
6097 34 : MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
6098 : GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx);
6099 17 : BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
6100 34 : TII->get(TargetOpcode::LOCAL_ESCAPE))
6101 : .addSym(FrameAllocSym)
6102 : .addFrameIndex(FI);
6103 : }
6104 :
6105 : return nullptr;
6106 : }
6107 :
6108 13 : case Intrinsic::localrecover: {
6109 : // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
6110 13 : MachineFunction &MF = DAG.getMachineFunction();
6111 : MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0);
6112 :
6113 : // Get the symbol that defines the frame offset.
6114 13 : auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
6115 : auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
6116 : unsigned IdxVal =
6117 13 : unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max()));
6118 : MCSymbol *FrameAllocSym =
6119 26 : MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
6120 : GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
6121 :
6122 : // Create a MCSymbol for the label to avoid any target lowering
6123 : // that would make this PC relative.
6124 26 : SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
6125 : SDValue OffsetVal =
6126 26 : DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym);
6127 :
6128 : // Add the offset to the FP.
6129 : Value *FP = I.getArgOperand(1);
6130 13 : SDValue FPVal = getValue(FP);
6131 26 : SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal);
6132 13 : setValue(&I, Add);
6133 :
6134 : return nullptr;
6135 : }
6136 :
6137 6 : case Intrinsic::eh_exceptionpointer:
6138 : case Intrinsic::eh_exceptioncode: {
6139 : // Get the exception pointer vreg, copy from it, and resize it to fit.
6140 6 : const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
6141 6 : MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
6142 6 : const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
6143 6 : unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
6144 : SDValue N =
6145 24 : DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
6146 6 : if (Intrinsic == Intrinsic::eh_exceptioncode)
6147 9 : N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
6148 6 : setValue(&I, N);
6149 : return nullptr;
6150 : }
6151 2 : case Intrinsic::xray_customevent: {
6152 : // Here we want to make sure that the intrinsic behaves as if it has a
6153 : // specific calling convention, and only for x86_64.
6154 : // FIXME: Support other platforms later.
6155 2 : const auto &Triple = DAG.getTarget().getTargetTriple();
6156 2 : if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
6157 : return nullptr;
6158 :
6159 2 : SDLoc DL = getCurSDLoc();
6160 : SmallVector<SDValue, 8> Ops;
6161 :
6162 : // We want to say that we always want the arguments in registers.
6163 2 : SDValue LogEntryVal = getValue(I.getArgOperand(0));
6164 2 : SDValue StrSizeVal = getValue(I.getArgOperand(1));
6165 4 : SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6166 2 : SDValue Chain = getRoot();
6167 2 : Ops.push_back(LogEntryVal);
6168 2 : Ops.push_back(StrSizeVal);
6169 2 : Ops.push_back(Chain);
6170 :
6171 : // We need to enforce the calling convention for the callsite, so that
6172 : // argument ordering is enforced correctly, and that register allocation can
6173 : // see that some registers may be assumed clobbered and have to preserve
6174 : // them across calls to the intrinsic.
6175 4 : MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL,
6176 : DL, NodeTys, Ops);
6177 : SDValue patchableNode = SDValue(MN, 0);
6178 2 : DAG.setRoot(patchableNode);
6179 2 : setValue(&I, patchableNode);
6180 : return nullptr;
6181 : }
6182 2 : case Intrinsic::xray_typedevent: {
6183 : // Here we want to make sure that the intrinsic behaves as if it has a
6184 : // specific calling convention, and only for x86_64.
6185 : // FIXME: Support other platforms later.
6186 2 : const auto &Triple = DAG.getTarget().getTargetTriple();
6187 2 : if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
6188 : return nullptr;
6189 :
6190 2 : SDLoc DL = getCurSDLoc();
6191 : SmallVector<SDValue, 8> Ops;
6192 :
6193 : // We want to say that we always want the arguments in registers.
6194 : // It's unclear to me how manipulating the selection DAG here forces callers
6195 : // to provide arguments in registers instead of on the stack.
6196 2 : SDValue LogTypeId = getValue(I.getArgOperand(0));
6197 2 : SDValue LogEntryVal = getValue(I.getArgOperand(1));
6198 2 : SDValue StrSizeVal = getValue(I.getArgOperand(2));
6199 4 : SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6200 2 : SDValue Chain = getRoot();
6201 2 : Ops.push_back(LogTypeId);
6202 2 : Ops.push_back(LogEntryVal);
6203 2 : Ops.push_back(StrSizeVal);
6204 2 : Ops.push_back(Chain);
6205 :
6206 : // We need to enforce the calling convention for the callsite, so that
6207 : // argument ordering is enforced correctly, and that register allocation can
6208 : // see that some registers may be assumed clobbered and have to preserve
6209 : // them across calls to the intrinsic.
6210 4 : MachineSDNode *MN = DAG.getMachineNode(
6211 : TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops);
6212 : SDValue patchableNode = SDValue(MN, 0);
6213 2 : DAG.setRoot(patchableNode);
6214 2 : setValue(&I, patchableNode);
6215 : return nullptr;
6216 : }
6217 0 : case Intrinsic::experimental_deoptimize:
6218 0 : LowerDeoptimizeCall(&I);
6219 0 : return nullptr;
6220 :
6221 58 : case Intrinsic::experimental_vector_reduce_fadd:
6222 : case Intrinsic::experimental_vector_reduce_fmul:
6223 : case Intrinsic::experimental_vector_reduce_add:
6224 : case Intrinsic::experimental_vector_reduce_mul:
6225 : case Intrinsic::experimental_vector_reduce_and:
6226 : case Intrinsic::experimental_vector_reduce_or:
6227 : case Intrinsic::experimental_vector_reduce_xor:
6228 : case Intrinsic::experimental_vector_reduce_smax:
6229 : case Intrinsic::experimental_vector_reduce_smin:
6230 : case Intrinsic::experimental_vector_reduce_umax:
6231 : case Intrinsic::experimental_vector_reduce_umin:
6232 : case Intrinsic::experimental_vector_reduce_fmax:
6233 : case Intrinsic::experimental_vector_reduce_fmin:
6234 58 : visitVectorReduce(I, Intrinsic);
6235 58 : return nullptr;
6236 :
6237 : case Intrinsic::icall_branch_funnel: {
6238 : SmallVector<SDValue, 16> Ops;
6239 22 : Ops.push_back(DAG.getRoot());
6240 11 : Ops.push_back(getValue(I.getArgOperand(0)));
6241 :
6242 : int64_t Offset;
6243 11 : auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
6244 11 : I.getArgOperand(1), Offset, DAG.getDataLayout()));
6245 : if (!Base)
6246 0 : report_fatal_error(
6247 : "llvm.icall.branch.funnel operand must be a GlobalValue");
6248 22 : Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0));
6249 :
6250 : struct BranchFunnelTarget {
6251 : int64_t Offset;
6252 : SDValue Target;
6253 : };
6254 11 : SmallVector<BranchFunnelTarget, 8> Targets;
6255 :
6256 47 : for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) {
6257 36 : auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
6258 36 : I.getArgOperand(Op), Offset, DAG.getDataLayout()));
6259 36 : if (ElemBase != Base)
6260 0 : report_fatal_error("all llvm.icall.branch.funnel operands must refer "
6261 : "to the same GlobalValue");
6262 :
6263 36 : SDValue Val = getValue(I.getArgOperand(Op + 1));
6264 : auto *GA = dyn_cast<GlobalAddressSDNode>(Val);
6265 : if (!GA)
6266 0 : report_fatal_error(
6267 : "llvm.icall.branch.funnel operand must be a GlobalValue");
6268 36 : Targets.push_back({Offset, DAG.getTargetGlobalAddress(
6269 72 : GA->getGlobal(), getCurSDLoc(),
6270 72 : Val.getValueType(), GA->getOffset())});
6271 : }
6272 11 : llvm::sort(Targets,
6273 : [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
6274 0 : return T1.Offset < T2.Offset;
6275 : });
6276 :
6277 47 : for (auto &T : Targets) {
6278 72 : Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32));
6279 36 : Ops.push_back(T.Target);
6280 : }
6281 :
6282 22 : SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL,
6283 11 : getCurSDLoc(), MVT::Other, Ops),
6284 : 0);
6285 11 : DAG.setRoot(N);
6286 11 : setValue(&I, N);
6287 11 : HasTailCall = true;
6288 : return nullptr;
6289 : }
6290 :
6291 : case Intrinsic::wasm_landingpad_index: {
6292 : // TODO store landing pad index in a map, which will be used when generating
6293 : // LSDA information
6294 : return nullptr;
6295 : }
6296 : }
6297 : }
6298 :
6299 226 : void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
6300 : const ConstrainedFPIntrinsic &FPI) {
6301 226 : SDLoc sdl = getCurSDLoc();
6302 : unsigned Opcode;
6303 : switch (FPI.getIntrinsicID()) {
6304 0 : default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
6305 : case Intrinsic::experimental_constrained_fadd:
6306 : Opcode = ISD::STRICT_FADD;
6307 : break;
6308 : case Intrinsic::experimental_constrained_fsub:
6309 : Opcode = ISD::STRICT_FSUB;
6310 : break;
6311 : case Intrinsic::experimental_constrained_fmul:
6312 : Opcode = ISD::STRICT_FMUL;
6313 : break;
6314 : case Intrinsic::experimental_constrained_fdiv:
6315 : Opcode = ISD::STRICT_FDIV;
6316 : break;
6317 : case Intrinsic::experimental_constrained_frem:
6318 : Opcode = ISD::STRICT_FREM;
6319 : break;
6320 : case Intrinsic::experimental_constrained_fma:
6321 : Opcode = ISD::STRICT_FMA;
6322 : break;
6323 : case Intrinsic::experimental_constrained_sqrt:
6324 : Opcode = ISD::STRICT_FSQRT;
6325 : break;
6326 : case Intrinsic::experimental_constrained_pow:
6327 : Opcode = ISD::STRICT_FPOW;
6328 : break;
6329 : case Intrinsic::experimental_constrained_powi:
6330 : Opcode = ISD::STRICT_FPOWI;
6331 : break;
6332 : case Intrinsic::experimental_constrained_sin:
6333 : Opcode = ISD::STRICT_FSIN;
6334 : break;
6335 : case Intrinsic::experimental_constrained_cos:
6336 : Opcode = ISD::STRICT_FCOS;
6337 : break;
6338 : case Intrinsic::experimental_constrained_exp:
6339 : Opcode = ISD::STRICT_FEXP;
6340 : break;
6341 : case Intrinsic::experimental_constrained_exp2:
6342 : Opcode = ISD::STRICT_FEXP2;
6343 : break;
6344 : case Intrinsic::experimental_constrained_log:
6345 : Opcode = ISD::STRICT_FLOG;
6346 : break;
6347 : case Intrinsic::experimental_constrained_log10:
6348 : Opcode = ISD::STRICT_FLOG10;
6349 : break;
6350 : case Intrinsic::experimental_constrained_log2:
6351 : Opcode = ISD::STRICT_FLOG2;
6352 : break;
6353 : case Intrinsic::experimental_constrained_rint:
6354 : Opcode = ISD::STRICT_FRINT;
6355 : break;
6356 : case Intrinsic::experimental_constrained_nearbyint:
6357 : Opcode = ISD::STRICT_FNEARBYINT;
6358 : break;
6359 : }
6360 226 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6361 226 : SDValue Chain = getRoot();
6362 : SmallVector<EVT, 4> ValueVTs;
6363 226 : ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
6364 226 : ValueVTs.push_back(MVT::Other); // Out chain
6365 :
6366 452 : SDVTList VTs = DAG.getVTList(ValueVTs);
6367 : SDValue Result;
6368 226 : if (FPI.isUnaryOp())
6369 240 : Result = DAG.getNode(Opcode, sdl, VTs,
6370 120 : { Chain, getValue(FPI.getArgOperand(0)) });
6371 106 : else if (FPI.isTernaryOp())
6372 36 : Result = DAG.getNode(Opcode, sdl, VTs,
6373 18 : { Chain, getValue(FPI.getArgOperand(0)),
6374 18 : getValue(FPI.getArgOperand(1)),
6375 36 : getValue(FPI.getArgOperand(2)) });
6376 : else
6377 176 : Result = DAG.getNode(Opcode, sdl, VTs,
6378 88 : { Chain, getValue(FPI.getArgOperand(0)),
6379 176 : getValue(FPI.getArgOperand(1)) });
6380 :
6381 : assert(Result.getNode()->getNumValues() == 2);
6382 226 : SDValue OutChain = Result.getValue(1);
6383 226 : DAG.setRoot(OutChain);
6384 : SDValue FPResult = Result.getValue(0);
6385 226 : setValue(&FPI, FPResult);
6386 226 : }
6387 :
6388 : std::pair<SDValue, SDValue>
6389 990458 : SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
6390 : const BasicBlock *EHPadBB) {
6391 990458 : MachineFunction &MF = DAG.getMachineFunction();
6392 990458 : MachineModuleInfo &MMI = MF.getMMI();
6393 : MCSymbol *BeginLabel = nullptr;
6394 :
6395 990458 : if (EHPadBB) {
6396 : // Insert a label before the invoke call to mark the try range. This can be
6397 : // used to detect deletion of the invoke via the MachineModuleInfo.
6398 496989 : BeginLabel = MMI.getContext().createTempSymbol();
6399 :
6400 : // For SjLj, keep track of which landing pads go with which invokes
6401 : // so as to maintain the ordering of pads in the LSDA.
6402 496989 : unsigned CallSiteIndex = MMI.getCurrentCallSite();
6403 496989 : if (CallSiteIndex) {
6404 175 : MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
6405 175 : LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
6406 :
6407 : // Now that the call site is handled, stop tracking it.
6408 : MMI.setCurrentCallSite(0);
6409 : }
6410 :
6411 : // Both PendingLoads and PendingExports must be flushed here;
6412 : // this call might not return.
6413 496989 : (void)getRoot();
6414 993978 : DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel));
6415 :
6416 496989 : CLI.setChain(getRoot());
6417 : }
6418 990458 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6419 990458 : std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
6420 :
6421 : assert((CLI.IsTailCall || Result.second.getNode()) &&
6422 : "Non-null chain expected with non-tail call!");
6423 : assert((Result.second.getNode() || !Result.first.getNode()) &&
6424 : "Null value expected with tail call!");
6425 :
6426 990452 : if (!Result.second.getNode()) {
6427 : // As a special case, a null chain means that a tail call has been emitted
6428 : // and the DAG root is already updated.
6429 5141 : HasTailCall = true;
6430 :
6431 : // Since there's no actual continuation from this block, nothing can be
6432 : // relying on us setting vregs for them.
6433 : PendingExports.clear();
6434 : } else {
6435 985311 : DAG.setRoot(Result.second);
6436 : }
6437 :
6438 990452 : if (EHPadBB) {
6439 : // Insert a label at the end of the invoke call to mark the try range. This
6440 : // can be used to detect deletion of the invoke via the MachineModuleInfo.
6441 496989 : MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
6442 993978 : DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
6443 :
6444 : // Inform MachineModuleInfo of range.
6445 496989 : auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
6446 : // There is a platform (e.g. wasm) that uses funclet style IR but does not
6447 : // actually use outlined funclets and their LSDA info style.
6448 496989 : if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {
6449 : assert(CLI.CS);
6450 146 : WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
6451 146 : EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()),
6452 : BeginLabel, EndLabel);
6453 : } else {
6454 496843 : MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
6455 : }
6456 : }
6457 :
6458 990452 : return Result;
6459 : }
6460 :
6461 990242 : void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
6462 : bool isTailCall,
6463 : const BasicBlock *EHPadBB) {
6464 990242 : auto &DL = DAG.getDataLayout();
6465 : FunctionType *FTy = CS.getFunctionType();
6466 : Type *RetTy = CS.getType();
6467 :
6468 : TargetLowering::ArgListTy Args;
6469 990242 : Args.reserve(CS.arg_size());
6470 :
6471 : const Value *SwiftErrorVal = nullptr;
6472 990242 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6473 :
6474 : // We can't tail call inside a function with a swifterror argument. Lowering
6475 : // does not support this yet. It would have to move into the swifterror
6476 : // register before the call.
6477 990242 : auto *Caller = CS.getInstruction()->getParent()->getParent();
6478 1927622 : if (TLI.supportSwiftError() &&
6479 1927622 : Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
6480 : isTailCall = false;
6481 :
6482 3129234 : for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
6483 3129234 : i != e; ++i) {
6484 : TargetLowering::ArgListEntry Entry;
6485 2138992 : const Value *V = *i;
6486 :
6487 : // Skip empty types
6488 2138992 : if (V->getType()->isEmptyTy())
6489 6 : continue;
6490 :
6491 2138986 : SDValue ArgNode = getValue(V);
6492 2138986 : Entry.Node = ArgNode; Entry.Ty = V->getType();
6493 :
6494 2138986 : Entry.setAttributes(&CS, i - CS.arg_begin());
6495 :
6496 : // Use swifterror virtual register as input to the call.
6497 2138986 : if (Entry.IsSwiftError && TLI.supportSwiftError()) {
6498 : SwiftErrorVal = V;
6499 : // We find the virtual register for the actual swifterror argument.
6500 : // Instead of using the Value, we use the virtual register instead.
6501 112 : Entry.Node = DAG.getRegister(FuncInfo
6502 : .getOrCreateSwiftErrorVRegUseAt(
6503 224 : CS.getInstruction(), FuncInfo.MBB, V)
6504 : .first,
6505 112 : EVT(TLI.getPointerTy(DL)));
6506 : }
6507 :
6508 2138986 : Args.push_back(Entry);
6509 :
6510 : // If we have an explicit sret argument that is an Instruction, (i.e., it
6511 : // might point to function-local memory), we can't meaningfully tail-call.
6512 2138986 : if (Entry.IsSRet && isa<Instruction>(V))
6513 : isTailCall = false;
6514 : }
6515 :
6516 : // Check if target-independent constraints permit a tail call here.
6517 : // Target-dependent constraints are checked within TLI->LowerCallTo.
6518 990242 : if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget()))
6519 : isTailCall = false;
6520 :
6521 : // Disable tail calls if there is an swifterror argument. Targets have not
6522 : // been updated to support tail calls.
6523 990242 : if (TLI.supportSwiftError() && SwiftErrorVal)
6524 : isTailCall = false;
6525 :
6526 1980478 : TargetLowering::CallLoweringInfo CLI(DAG);
6527 990242 : CLI.setDebugLoc(getCurSDLoc())
6528 990242 : .setChain(getRoot())
6529 990242 : .setCallee(RetTy, FTy, Callee, std::move(Args), CS)
6530 : .setTailCall(isTailCall)
6531 990242 : .setConvergent(CS.isConvergent());
6532 990242 : std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
6533 :
6534 990236 : if (Result.first.getNode()) {
6535 : const Instruction *Inst = CS.getInstruction();
6536 389037 : Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first);
6537 389037 : setValue(Inst, Result.first);
6538 : }
6539 :
6540 : // The last element of CLI.InVals has the SDValue for swifterror return.
6541 : // Here we copy it to a virtual register and update SwiftErrorMap for
6542 : // book-keeping.
6543 990236 : if (SwiftErrorVal && TLI.supportSwiftError()) {
6544 : // Get the last element of InVals.
6545 112 : SDValue Src = CLI.InVals.back();
6546 : unsigned VReg; bool CreatedVReg;
6547 : std::tie(VReg, CreatedVReg) =
6548 224 : FuncInfo.getOrCreateSwiftErrorVRegDefAt(CS.getInstruction());
6549 112 : SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
6550 : // We update the virtual register for the actual swifterror argument.
6551 112 : if (CreatedVReg)
6552 69 : FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
6553 112 : DAG.setRoot(CopyNode);
6554 : }
6555 990236 : }
6556 :
6557 134 : static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
6558 : SelectionDAGBuilder &Builder) {
6559 : // Check to see if this load can be trivially constant folded, e.g. if the
6560 : // input is from a string literal.
6561 : if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
6562 : // Cast pointer to the type we really want to load.
6563 : Type *LoadTy =
6564 14 : Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits());
6565 28 : if (LoadVT.isVector())
6566 4 : LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements());
6567 :
6568 14 : LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
6569 : PointerType::getUnqual(LoadTy));
6570 :
6571 14 : if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(
6572 14 : const_cast<Constant *>(LoadInput), LoadTy, *Builder.DL))
6573 14 : return Builder.getValue(LoadCst);
6574 : }
6575 :
6576 : // Otherwise, we have to emit the load. If the pointer is to unfoldable but
6577 : // still constant memory, the input chain can be the entry node.
6578 120 : SDValue Root;
6579 : bool ConstantMemory = false;
6580 :
6581 : // Do not serialize (non-volatile) loads of constant memory with anything.
6582 162 : if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) {
6583 0 : Root = Builder.DAG.getEntryNode();
6584 : ConstantMemory = true;
6585 : } else {
6586 : // Do not serialize non-volatile loads against each other.
6587 120 : Root = Builder.DAG.getRoot();
6588 : }
6589 :
6590 120 : SDValue Ptr = Builder.getValue(PtrVal);
6591 120 : SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root,
6592 : Ptr, MachinePointerInfo(PtrVal),
6593 120 : /* Alignment = */ 1);
6594 :
6595 120 : if (!ConstantMemory)
6596 120 : Builder.PendingLoads.push_back(LoadVal.getValue(1));
6597 120 : return LoadVal;
6598 : }
6599 :
6600 : /// Record the value for an instruction that produces an integer result,
6601 : /// converting the type where necessary.
6602 85 : void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
6603 : SDValue Value,
6604 : bool IsSigned) {
6605 85 : EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
6606 85 : I.getType(), true);
6607 85 : if (IsSigned)
6608 32 : Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
6609 : else
6610 138 : Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT);
6611 85 : setValue(&I, Value);
6612 85 : }
6613 :
6614 : /// See if we can lower a memcmp call into an optimized form. If so, return
6615 : /// true and lower it. Otherwise return false, and it will be lowered like a
6616 : /// normal call.
6617 : /// The caller already checked that \p I calls the appropriate LibFunc with a
6618 : /// correct prototype.
6619 785 : bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
6620 785 : const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
6621 : const Value *Size = I.getArgOperand(2);
6622 : const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
6623 336 : if (CSize && CSize->getZExtValue() == 0) {
6624 13 : EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
6625 13 : I.getType(), true);
6626 26 : setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT));
6627 : return true;
6628 : }
6629 :
6630 772 : const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
6631 : std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(
6632 1544 : DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS),
6633 2316 : getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS));
6634 772 : if (Res.first.getNode()) {
6635 12 : processIntegerCallValue(I, Res.first, true);
6636 12 : PendingLoads.push_back(Res.second);
6637 12 : return true;
6638 : }
6639 :
6640 : // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
6641 : // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
6642 760 : if (!CSize || !isOnlyUsedInZeroEqualityComparison(&I))
6643 537 : return false;
6644 :
6645 : // If the target has a fast compare for the given size, it will return a
6646 : // preferred load type for that size. Require that the load VT is legal and
6647 : // that the target supports unaligned loads of that type. Otherwise, return
6648 : // INVALID.
6649 : auto hasFastLoadsAndCompare = [&](unsigned NumBits) {
6650 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6651 : MVT LVT = TLI.hasFastEqualityCompare(NumBits);
6652 : if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) {
6653 : // TODO: Handle 5 byte compare as 4-byte + 1 byte.
6654 : // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
6655 : // TODO: Check alignment of src and dest ptrs.
6656 : unsigned DstAS = LHS->getType()->getPointerAddressSpace();
6657 : unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
6658 : if (!TLI.isTypeLegal(LVT) ||
6659 : !TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) ||
6660 : !TLI.allowsMisalignedMemoryAccesses(LVT, DstAS))
6661 : LVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
6662 : }
6663 :
6664 : return LVT;
6665 223 : };
6666 :
6667 : // This turns into unaligned loads. We only do this if the target natively
6668 : // supports the MVT we'll be loading or if it is small enough (<= 4) that
6669 : // we'll only produce a small number of byte loads.
6670 223 : MVT LoadVT;
6671 223 : unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
6672 223 : switch (NumBitsToCompare) {
6673 : default:
6674 : return false;
6675 : case 16:
6676 30 : LoadVT = MVT::i16;
6677 30 : break;
6678 : case 32:
6679 14 : LoadVT = MVT::i32;
6680 14 : break;
6681 47 : case 64:
6682 : case 128:
6683 : case 256:
6684 47 : LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
6685 47 : break;
6686 : }
6687 :
6688 91 : if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE)
6689 : return false;
6690 :
6691 67 : SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this);
6692 67 : SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this);
6693 :
6694 : // Bitcast to a wide integer type if the loads are vectors.
6695 134 : if (LoadVT.isVector()) {
6696 8 : EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits());
6697 8 : LoadL = DAG.getBitcast(CmpVT, LoadL);
6698 8 : LoadR = DAG.getBitcast(CmpVT, LoadR);
6699 : }
6700 :
6701 201 : SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE);
6702 67 : processIntegerCallValue(I, Cmp, false);
6703 67 : return true;
6704 : }
6705 :
6706 : /// See if we can lower a memchr call into an optimized form. If so, return
6707 : /// true and lower it. Otherwise return false, and it will be lowered like a
6708 : /// normal call.
6709 : /// The caller already checked that \p I calls the appropriate LibFunc with a
6710 : /// correct prototype.
6711 112 : bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
6712 112 : const Value *Src = I.getArgOperand(0);
6713 : const Value *Char = I.getArgOperand(1);
6714 : const Value *Length = I.getArgOperand(2);
6715 :
6716 112 : const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
6717 : std::pair<SDValue, SDValue> Res =
6718 224 : TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(),
6719 : getValue(Src), getValue(Char), getValue(Length),
6720 336 : MachinePointerInfo(Src));
6721 112 : if (Res.first.getNode()) {
6722 5 : setValue(&I, Res.first);
6723 5 : PendingLoads.push_back(Res.second);
6724 5 : return true;
6725 : }
6726 :
6727 : return false;
6728 : }
6729 :
6730 : /// See if we can lower a mempcpy call into an optimized form. If so, return
6731 : /// true and lower it. Otherwise return false, and it will be lowered like a
6732 : /// normal call.
6733 : /// The caller already checked that \p I calls the appropriate LibFunc with a
6734 : /// correct prototype.
6735 2 : bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
6736 2 : SDValue Dst = getValue(I.getArgOperand(0));
6737 2 : SDValue Src = getValue(I.getArgOperand(1));
6738 2 : SDValue Size = getValue(I.getArgOperand(2));
6739 :
6740 2 : unsigned DstAlign = DAG.InferPtrAlignment(Dst);
6741 2 : unsigned SrcAlign = DAG.InferPtrAlignment(Src);
6742 2 : unsigned Align = std::min(DstAlign, SrcAlign);
6743 2 : if (Align == 0) // Alignment of one or both could not be inferred.
6744 : Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved.
6745 :
6746 : bool isVol = false;
6747 2 : SDLoc sdl = getCurSDLoc();
6748 :
6749 : // In the mempcpy context we need to pass in a false value for isTailCall
6750 : // because the return pointer needs to be adjusted by the size of
6751 : // the copied memory.
6752 2 : SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol,
6753 : false, /*isTailCall=*/false,
6754 : MachinePointerInfo(I.getArgOperand(0)),
6755 2 : MachinePointerInfo(I.getArgOperand(1)));
6756 : assert(MC.getNode() != nullptr &&
6757 : "** memcpy should not be lowered as TailCall in mempcpy context **");
6758 2 : DAG.setRoot(MC);
6759 :
6760 : // Check if Size needs to be truncated or extended.
6761 4 : Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType());
6762 :
6763 : // Adjust return pointer to point just past the last dst byte.
6764 2 : SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(),
6765 2 : Dst, Size);
6766 2 : setValue(&I, DstPlusSize);
6767 2 : return true;
6768 : }
6769 :
6770 : /// See if we can lower a strcpy call into an optimized form. If so, return
6771 : /// true and lower it, otherwise return false and it will be lowered like a
6772 : /// normal call.
6773 : /// The caller already checked that \p I calls the appropriate LibFunc with a
6774 : /// correct prototype.
6775 151 : bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
6776 151 : const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
6777 :
6778 151 : const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
6779 : std::pair<SDValue, SDValue> Res =
6780 151 : TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(),
6781 : getValue(Arg0), getValue(Arg1),
6782 : MachinePointerInfo(Arg0),
6783 453 : MachinePointerInfo(Arg1), isStpcpy);
6784 151 : if (Res.first.getNode()) {
6785 3 : setValue(&I, Res.first);
6786 3 : DAG.setRoot(Res.second);
6787 3 : return true;
6788 : }
6789 :
6790 : return false;
6791 : }
6792 :
6793 : /// See if we can lower a strcmp call into an optimized form. If so, return
6794 : /// true and lower it, otherwise return false and it will be lowered like a
6795 : /// normal call.
6796 : /// The caller already checked that \p I calls the appropriate LibFunc with a
6797 : /// correct prototype.
6798 110 : bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
6799 110 : const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
6800 :
6801 110 : const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
6802 : std::pair<SDValue, SDValue> Res =
6803 220 : TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(),
6804 : getValue(Arg0), getValue(Arg1),
6805 : MachinePointerInfo(Arg0),
6806 330 : MachinePointerInfo(Arg1));
6807 110 : if (Res.first.getNode()) {
6808 4 : processIntegerCallValue(I, Res.first, true);
6809 4 : PendingLoads.push_back(Res.second);
6810 4 : return true;
6811 : }
6812 :
6813 : return false;
6814 : }
6815 :
6816 : /// See if we can lower a strlen call into an optimized form. If so, return
6817 : /// true and lower it, otherwise return false and it will be lowered like a
6818 : /// normal call.
6819 : /// The caller already checked that \p I calls the appropriate LibFunc with a
6820 : /// correct prototype.
6821 1195 : bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
6822 1195 : const Value *Arg0 = I.getArgOperand(0);
6823 :
6824 1195 : const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
6825 : std::pair<SDValue, SDValue> Res =
6826 2390 : TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(),
6827 3585 : getValue(Arg0), MachinePointerInfo(Arg0));
6828 1195 : if (Res.first.getNode()) {
6829 1 : processIntegerCallValue(I, Res.first, false);
6830 1 : PendingLoads.push_back(Res.second);
6831 1 : return true;
6832 : }
6833 :
6834 : return false;
6835 : }
6836 :
6837 : /// See if we can lower a strnlen call into an optimized form. If so, return
6838 : /// true and lower it, otherwise return false and it will be lowered like a
6839 : /// normal call.
6840 : /// The caller already checked that \p I calls the appropriate LibFunc with a
6841 : /// correct prototype.
6842 2 : bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
6843 2 : const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
6844 :
6845 2 : const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
6846 : std::pair<SDValue, SDValue> Res =
6847 4 : TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(),
6848 : getValue(Arg0), getValue(Arg1),
6849 6 : MachinePointerInfo(Arg0));
6850 2 : if (Res.first.getNode()) {
6851 1 : processIntegerCallValue(I, Res.first, false);
6852 1 : PendingLoads.push_back(Res.second);
6853 1 : return true;
6854 : }
6855 :
6856 : return false;
6857 : }
6858 :
6859 : /// See if we can lower a unary floating-point operation into an SDNode with
6860 : /// the specified Opcode. If so, return true and lower it, otherwise return
6861 : /// false and it will be lowered like a normal call.
6862 : /// The caller already checked that \p I calls the appropriate LibFunc with a
6863 : /// correct prototype.
6864 1173 : bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
6865 : unsigned Opcode) {
6866 : // We already checked this call's prototype; verify it doesn't modify errno.
6867 1173 : if (!I.onlyReadsMemory())
6868 : return false;
6869 :
6870 559 : SDValue Tmp = getValue(I.getArgOperand(0));
6871 2236 : setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp));
6872 559 : return true;
6873 : }
6874 :
6875 : /// See if we can lower a binary floating-point operation into an SDNode with
6876 : /// the specified Opcode. If so, return true and lower it. Otherwise return
6877 : /// false, and it will be lowered like a normal call.
6878 : /// The caller already checked that \p I calls the appropriate LibFunc with a
6879 : /// correct prototype.
6880 38 : bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
6881 : unsigned Opcode) {
6882 : // We already checked this call's prototype; verify it doesn't modify errno.
6883 38 : if (!I.onlyReadsMemory())
6884 : return false;
6885 :
6886 38 : SDValue Tmp0 = getValue(I.getArgOperand(0));
6887 38 : SDValue Tmp1 = getValue(I.getArgOperand(1));
6888 38 : EVT VT = Tmp0.getValueType();
6889 114 : setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1));
6890 38 : return true;
6891 : }
6892 :
6893 1054243 : void SelectionDAGBuilder::visitCall(const CallInst &I) {
6894 : // Handle inline assembly differently.
6895 1054243 : if (isa<InlineAsm>(I.getCalledValue())) {
6896 16913 : visitInlineAsm(&I);
6897 560977 : return;
6898 : }
6899 :
6900 1037330 : MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
6901 1037330 : computeUsesVAFloatArgument(I, MMI);
6902 :
6903 : const char *RenameFn = nullptr;
6904 : if (Function *F = I.getCalledFunction()) {
6905 1028208 : if (F->isDeclaration()) {
6906 : // Is this an LLVM intrinsic or a target-specific intrinsic?
6907 734505 : unsigned IID = F->getIntrinsicID();
6908 734505 : if (!IID)
6909 191778 : if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo())
6910 906 : IID = II->getIntrinsicID(F);
6911 :
6912 734505 : if (IID) {
6913 543254 : RenameFn = visitIntrinsicCall(I, IID);
6914 543254 : if (!RenameFn)
6915 544064 : return;
6916 : }
6917 : }
6918 :
6919 : // Check for well-known libc/libm calls. If the function is internal, it
6920 : // can't be a library call. Don't do the check if marked as nobuiltin for
6921 : // some reason or the call site requires strict floating point semantics.
6922 : LibFunc Func;
6923 1451196 : if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() &&
6924 967443 : F->hasName() && LibInfo->getLibFunc(*F, Func) &&
6925 11034 : LibInfo->hasOptimizedCodeGen(Func)) {
6926 3680 : switch (Func) {
6927 : default: break;
6928 112 : case LibFunc_copysign:
6929 : case LibFunc_copysignf:
6930 : case LibFunc_copysignl:
6931 : // We already checked this call's prototype; verify it doesn't modify
6932 : // errno.
6933 112 : if (I.onlyReadsMemory()) {
6934 107 : SDValue LHS = getValue(I.getArgOperand(0));
6935 107 : SDValue RHS = getValue(I.getArgOperand(1));
6936 438 : setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(),
6937 : LHS.getValueType(), LHS, RHS));
6938 : return;
6939 5 : }
6940 : break;
6941 85 : case LibFunc_fabs:
6942 : case LibFunc_fabsf:
6943 : case LibFunc_fabsl:
6944 85 : if (visitUnaryFloatCall(I, ISD::FABS))
6945 : return;
6946 : break;
6947 18 : case LibFunc_fmin:
6948 : case LibFunc_fminf:
6949 : case LibFunc_fminl:
6950 18 : if (visitBinaryFloatCall(I, ISD::FMINNUM))
6951 : return;
6952 : break;
6953 20 : case LibFunc_fmax:
6954 : case LibFunc_fmaxf:
6955 : case LibFunc_fmaxl:
6956 20 : if (visitBinaryFloatCall(I, ISD::FMAXNUM))
6957 : return;
6958 : break;
6959 230 : case LibFunc_sin:
6960 : case LibFunc_sinf:
6961 : case LibFunc_sinl:
6962 230 : if (visitUnaryFloatCall(I, ISD::FSIN))
6963 : return;
6964 : break;
6965 179 : case LibFunc_cos:
6966 : case LibFunc_cosf:
6967 : case LibFunc_cosl:
6968 179 : if (visitUnaryFloatCall(I, ISD::FCOS))
6969 : return;
6970 : break;
6971 390 : case LibFunc_sqrt:
6972 : case LibFunc_sqrtf:
6973 : case LibFunc_sqrtl:
6974 : case LibFunc_sqrt_finite:
6975 : case LibFunc_sqrtf_finite:
6976 : case LibFunc_sqrtl_finite:
6977 390 : if (visitUnaryFloatCall(I, ISD::FSQRT))
6978 : return;
6979 : break;
6980 69 : case LibFunc_floor:
6981 : case LibFunc_floorf:
6982 : case LibFunc_floorl:
6983 69 : if (visitUnaryFloatCall(I, ISD::FFLOOR))
6984 : return;
6985 : break;
6986 25 : case LibFunc_nearbyint:
6987 : case LibFunc_nearbyintf:
6988 : case LibFunc_nearbyintl:
6989 25 : if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
6990 : return;
6991 : break;
6992 56 : case LibFunc_ceil:
6993 : case LibFunc_ceilf:
6994 : case LibFunc_ceill:
6995 56 : if (visitUnaryFloatCall(I, ISD::FCEIL))
6996 : return;
6997 : break;
6998 25 : case LibFunc_rint:
6999 : case LibFunc_rintf:
7000 : case LibFunc_rintl:
7001 25 : if (visitUnaryFloatCall(I, ISD::FRINT))
7002 : return;
7003 : break;
7004 39 : case LibFunc_round:
7005 : case LibFunc_roundf:
7006 : case LibFunc_roundl:
7007 39 : if (visitUnaryFloatCall(I, ISD::FROUND))
7008 : return;
7009 : break;
7010 46 : case LibFunc_trunc:
7011 : case LibFunc_truncf:
7012 : case LibFunc_truncl:
7013 46 : if (visitUnaryFloatCall(I, ISD::FTRUNC))
7014 : return;
7015 : break;
7016 17 : case LibFunc_log2:
7017 : case LibFunc_log2f:
7018 : case LibFunc_log2l:
7019 17 : if (visitUnaryFloatCall(I, ISD::FLOG2))
7020 : return;
7021 : break;
7022 12 : case LibFunc_exp2:
7023 : case LibFunc_exp2f:
7024 : case LibFunc_exp2l:
7025 12 : if (visitUnaryFloatCall(I, ISD::FEXP2))
7026 : return;
7027 : break;
7028 785 : case LibFunc_memcmp:
7029 785 : if (visitMemCmpCall(I))
7030 : return;
7031 : break;
7032 2 : case LibFunc_mempcpy:
7033 2 : if (visitMemPCpyCall(I))
7034 : return;
7035 : break;
7036 112 : case LibFunc_memchr:
7037 112 : if (visitMemChrCall(I))
7038 : return;
7039 : break;
7040 150 : case LibFunc_strcpy:
7041 150 : if (visitStrCpyCall(I, false))
7042 : return;
7043 : break;
7044 1 : case LibFunc_stpcpy:
7045 1 : if (visitStrCpyCall(I, true))
7046 : return;
7047 : break;
7048 110 : case LibFunc_strcmp:
7049 110 : if (visitStrCmpCall(I))
7050 : return;
7051 : break;
7052 1195 : case LibFunc_strlen:
7053 1195 : if (visitStrLenCall(I))
7054 : return;
7055 : break;
7056 2 : case LibFunc_strnlen:
7057 2 : if (visitStrNLenCall(I))
7058 : return;
7059 : break;
7060 : }
7061 : }
7062 : }
7063 :
7064 493266 : SDValue Callee;
7065 493266 : if (!RenameFn)
7066 493264 : Callee = getValue(I.getCalledValue());
7067 : else
7068 4 : Callee = DAG.getExternalSymbol(
7069 : RenameFn,
7070 4 : DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
7071 :
7072 : // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
7073 : // have to do anything here to lower funclet bundles.
7074 : assert(!I.hasOperandBundlesOtherThan(
7075 : {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
7076 : "Cannot lower calls with arbitrary operand bundles!");
7077 :
7078 493266 : if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
7079 3 : LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
7080 : else
7081 : // Check if we can potentially perform a tail call. More detailed checking
7082 : // is be done within LowerCallTo, after more information about the call is
7083 : // known.
7084 986526 : LowerCallTo(&I, Callee, I.isTailCall());
7085 : }
7086 :
7087 : namespace {
7088 :
7089 : /// AsmOperandInfo - This contains information for each constraint that we are
7090 : /// lowering.
7091 262290 : class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
7092 : public:
7093 : /// CallOperand - If this is the result output operand or a clobber
7094 : /// this is null, otherwise it is the incoming operand to the CallInst.
7095 : /// This gets modified as the asm is processed.
7096 : SDValue CallOperand;
7097 :
7098 : /// AssignedRegs - If this is a register or register class operand, this
7099 : /// contains the set of register corresponding to the operand.
7100 : RegsForValue AssignedRegs;
7101 :
7102 68582 : explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
7103 68582 : : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) {
7104 68582 : }
7105 :
7106 : /// Whether or not this operand accesses memory
7107 64372 : bool hasMemory(const TargetLowering &TLI) const {
7108 : // Indirect operand accesses access memory.
7109 64372 : if (isIndirect)
7110 : return true;
7111 :
7112 168522 : for (const auto &Code : Codes)
7113 214356 : if (TLI.getConstraintType(Code) == TargetLowering::C_Memory)
7114 : return true;
7115 :
7116 : return false;
7117 : }
7118 :
7119 : /// getCallOperandValEVT - Return the EVT of the Value* that this operand
7120 : /// corresponds to. If there is no Value* for this operand, it returns
7121 : /// MVT::Other.
7122 0 : EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI,
7123 : const DataLayout &DL) const {
7124 0 : if (!CallOperandVal) return MVT::Other;
7125 :
7126 0 : if (isa<BasicBlock>(CallOperandVal))
7127 0 : return TLI.getPointerTy(DL);
7128 :
7129 0 : llvm::Type *OpTy = CallOperandVal->getType();
7130 :
7131 : // FIXME: code duplicated from TargetLowering::ParseConstraints().
7132 : // If this is an indirect operand, the operand is a pointer to the
7133 : // accessed type.
7134 0 : if (isIndirect) {
7135 : PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
7136 : if (!PtrTy)
7137 0 : report_fatal_error("Indirect operand for inline asm not a pointer!");
7138 0 : OpTy = PtrTy->getElementType();
7139 : }
7140 :
7141 : // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
7142 : if (StructType *STy = dyn_cast<StructType>(OpTy))
7143 0 : if (STy->getNumElements() == 1)
7144 0 : OpTy = STy->getElementType(0);
7145 :
7146 : // If OpTy is not a single value, it may be a struct/union that we
7147 : // can tile with integers.
7148 0 : if (!OpTy->isSingleValueType() && OpTy->isSized()) {
7149 0 : unsigned BitSize = DL.getTypeSizeInBits(OpTy);
7150 0 : switch (BitSize) {
7151 : default: break;
7152 0 : case 1:
7153 : case 8:
7154 : case 16:
7155 : case 32:
7156 : case 64:
7157 : case 128:
7158 0 : OpTy = IntegerType::get(Context, BitSize);
7159 0 : break;
7160 : }
7161 : }
7162 :
7163 0 : return TLI.getValueType(DL, OpTy, true);
7164 : }
7165 : };
7166 :
7167 : using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>;
7168 :
7169 : } // end anonymous namespace
7170 :
7171 : /// Make sure that the output operand \p OpInfo and its corresponding input
7172 : /// operand \p MatchingOpInfo have compatible constraint types (otherwise error
7173 : /// out).
7174 339 : static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo,
7175 : SDISelAsmOperandInfo &MatchingOpInfo,
7176 : SelectionDAG &DAG) {
7177 339 : if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT)
7178 326 : return;
7179 :
7180 26 : const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
7181 13 : const auto &TLI = DAG.getTargetLoweringInfo();
7182 :
7183 : std::pair<unsigned, const TargetRegisterClass *> MatchRC =
7184 : TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
7185 26 : OpInfo.ConstraintVT);
7186 : std::pair<unsigned, const TargetRegisterClass *> InputRC =
7187 : TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode,
7188 26 : MatchingOpInfo.ConstraintVT);
7189 13 : if ((OpInfo.ConstraintVT.isInteger() !=
7190 26 : MatchingOpInfo.ConstraintVT.isInteger()) ||
7191 13 : (MatchRC.second != InputRC.second)) {
7192 : // FIXME: error out in a more elegant fashion
7193 0 : report_fatal_error("Unsupported asm: input constraint"
7194 : " with a matching output constraint of"
7195 : " incompatible type!");
7196 : }
7197 13 : MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT;
7198 : }
7199 :
7200 : /// Get a direct memory input to behave well as an indirect operand.
7201 : /// This may introduce stores, hence the need for a \p Chain.
7202 : /// \return The (possibly updated) chain.
7203 86 : static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
7204 : SDISelAsmOperandInfo &OpInfo,
7205 : SelectionDAG &DAG) {
7206 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7207 :
7208 : // If we don't have an indirect input, put it in the constpool if we can,
7209 : // otherwise spill it to a stack slot.
7210 : // TODO: This isn't quite right. We need to handle these according to
7211 : // the addressing mode that the constraint wants. Also, this may take
7212 : // an additional register for the computation and we don't want that
7213 : // either.
7214 :
7215 : // If the operand is a float, integer, or vector constant, spill to a
7216 : // constant pool entry to get its address.
7217 86 : const Value *OpVal = OpInfo.CallOperandVal;
7218 82 : if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
7219 162 : isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
7220 10 : OpInfo.CallOperand = DAG.getConstantPool(
7221 20 : cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
7222 10 : return Chain;
7223 : }
7224 :
7225 : // Otherwise, create a stack slot and emit a store to it before the asm.
7226 76 : Type *Ty = OpVal->getType();
7227 76 : auto &DL = DAG.getDataLayout();
7228 76 : uint64_t TySize = DL.getTypeAllocSize(Ty);
7229 76 : unsigned Align = DL.getPrefTypeAlignment(Ty);
7230 76 : MachineFunction &MF = DAG.getMachineFunction();
7231 76 : int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
7232 76 : SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
7233 76 : Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot,
7234 76 : MachinePointerInfo::getFixedStack(MF, SSFI));
7235 76 : OpInfo.CallOperand = StackSlot;
7236 :
7237 76 : return Chain;
7238 : }
7239 :
7240 : /// GetRegistersForValue - Assign registers (virtual or physical) for the
7241 : /// specified operand. We prefer to assign virtual registers, to allow the
7242 : /// register allocator to handle the assignment process. However, if the asm
7243 : /// uses features that we can't model on machineinstrs, we have SDISel do the
7244 : /// allocation. This produces generally horrible, but correct, code.
7245 : ///
7246 : /// OpInfo describes the operand
7247 : /// RefOpInfo describes the matching operand if any, the operand otherwise
7248 63369 : static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
7249 : const SDLoc &DL, SDISelAsmOperandInfo &OpInfo,
7250 : SDISelAsmOperandInfo &RefOpInfo) {
7251 63369 : LLVMContext &Context = *DAG.getContext();
7252 :
7253 63369 : MachineFunction &MF = DAG.getMachineFunction();
7254 : SmallVector<unsigned, 4> Regs;
7255 63369 : const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
7256 :
7257 : // If this is a constraint for a single physreg, or a constraint for a
7258 : // register class, find it.
7259 : std::pair<unsigned, const TargetRegisterClass *> PhysReg =
7260 : TLI.getRegForInlineAsmConstraint(&TRI, RefOpInfo.ConstraintCode,
7261 126738 : RefOpInfo.ConstraintVT);
7262 :
7263 : unsigned NumRegs = 1;
7264 63369 : if (OpInfo.ConstraintVT != MVT::Other) {
7265 : // If this is an FP operand in an integer register (or visa versa), or more
7266 : // generally if the operand value disagrees with the register class we plan
7267 : // to stick it in, fix the operand type.
7268 : //
7269 : // If this is an input value, the bitcast to the new type is done now.
7270 : // Bitcast for output value is done at the end of visitInlineAsm().
7271 10709 : if ((OpInfo.Type == InlineAsm::isOutput ||
7272 10709 : OpInfo.Type == InlineAsm::isInput) &&
7273 21357 : PhysReg.second &&
7274 : !TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) {
7275 : // Try to convert to the first EVT that the reg class contains. If the
7276 : // types are identical size, use a bitcast to convert (e.g. two differing
7277 : // vector types). Note: output bitcast is done at the end of
7278 : // visitInlineAsm().
7279 510 : MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second);
7280 510 : if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
7281 : // Exclude indirect inputs while they are unsupported because the code
7282 : // to perform the load is missing and thus OpInfo.CallOperand still
7283 : // refers to the input address rather than the pointed-to value.
7284 184 : if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect)
7285 87 : OpInfo.CallOperand =
7286 87 : DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
7287 184 : OpInfo.ConstraintVT = RegVT;
7288 : // If the operand is an FP value and we want it in integer registers,
7289 : // use the corresponding integer type. This turns an f64 value into
7290 : // i64, which can be passed with two i32 values on a 32-bit machine.
7291 326 : } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
7292 33 : RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
7293 33 : if (OpInfo.Type == InlineAsm::isInput)
7294 13 : OpInfo.CallOperand =
7295 13 : DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
7296 33 : OpInfo.ConstraintVT = RegVT;
7297 : }
7298 : }
7299 :
7300 21418 : NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
7301 : }
7302 :
7303 : // No need to allocate a matching input constraint since the constraint it's
7304 : // matching to has already been allocated.
7305 63369 : if (OpInfo.isMatchingInputConstraint())
7306 : return;
7307 :
7308 : MVT RegVT;
7309 : EVT ValueVT = OpInfo.ConstraintVT;
7310 :
7311 : // If this is a constraint for a specific physical register, like {r17},
7312 : // assign it now.
7313 63029 : if (unsigned AssignedReg = PhysReg.first) {
7314 : const TargetRegisterClass *RC = PhysReg.second;
7315 50604 : if (OpInfo.ConstraintVT == MVT::Other)
7316 49160 : ValueVT = *TRI.legalclasstypes_begin(*RC);
7317 :
7318 : // Get the actual register value type. This is important, because the user
7319 : // may have asked for (e.g.) the AX register in i32 type. We need to
7320 : // remember that AX is actually i16 to get the right extension.
7321 50604 : RegVT = *TRI.legalclasstypes_begin(*RC);
7322 :
7323 : // This is an explicit reference to a physical register.
7324 50604 : Regs.push_back(AssignedReg);
7325 :
7326 : // If this is an expanded reference, add the rest of the regs to Regs.
7327 50604 : if (NumRegs != 1) {
7328 23 : TargetRegisterClass::iterator I = RC->begin();
7329 65 : for (; *I != AssignedReg; ++I)
7330 : assert(I != RC->end() && "Didn't find reg!");
7331 :
7332 : // Already added the first reg.
7333 23 : --NumRegs; ++I;
7334 46 : for (; NumRegs; --NumRegs, ++I) {
7335 : assert(I != RC->end() && "Ran out of registers to allocate!");
7336 23 : Regs.push_back(*I);
7337 : }
7338 : }
7339 :
7340 50604 : OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
7341 50604 : return;
7342 : }
7343 :
7344 : // Otherwise, if this was a reference to an LLVM register class, create vregs
7345 : // for this reference.
7346 12425 : if (const TargetRegisterClass *RC = PhysReg.second) {
7347 8872 : RegVT = *TRI.legalclasstypes_begin(*RC);
7348 8872 : if (OpInfo.ConstraintVT == MVT::Other)
7349 1 : ValueVT = RegVT;
7350 :
7351 : // Create the appropriate number of virtual registers.
7352 8872 : MachineRegisterInfo &RegInfo = MF.getRegInfo();
7353 17891 : for (; NumRegs; --NumRegs)
7354 9019 : Regs.push_back(RegInfo.createVirtualRegister(RC));
7355 :
7356 8872 : OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
7357 8872 : return;
7358 : }
7359 :
7360 : // Otherwise, we couldn't allocate enough registers for this.
7361 : }
7362 :
7363 : static unsigned
7364 : findMatchingInlineAsmOperand(unsigned OperandNo,
7365 : const std::vector<SDValue> &AsmNodeOperands) {
7366 : // Scan until we find the definition we already emitted of this operand.
7367 : unsigned CurOp = InlineAsm::Op_FirstOperand;
7368 2897 : for (; OperandNo; --OperandNo) {
7369 : // Advance to the next operand.
7370 : unsigned OpFlag =
7371 7587 : cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
7372 : assert((InlineAsm::isRegDefKind(OpFlag) ||
7373 : InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
7374 : InlineAsm::isMemKind(OpFlag)) &&
7375 : "Skipped past definitions?");
7376 2529 : CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1;
7377 : }
7378 : return CurOp;
7379 : }
7380 :
7381 : /// Fill \p Regs with \p NumRegs new virtual registers of type \p RegVT
7382 : /// \return true if it has succeeded, false otherwise
7383 332 : static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs,
7384 : MVT RegVT, SelectionDAG &DAG) {
7385 332 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7386 332 : MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
7387 692 : for (unsigned i = 0, e = NumRegs; i != e; ++i) {
7388 360 : if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
7389 720 : Regs.push_back(RegInfo.createVirtualRegister(RC));
7390 : else
7391 : return false;
7392 : }
7393 : return true;
7394 : }
7395 :
7396 : namespace {
7397 :
7398 : class ExtraFlags {
7399 : unsigned Flags = 0;
7400 :
7401 : public:
7402 16914 : explicit ExtraFlags(ImmutableCallSite CS) {
7403 : const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
7404 16914 : if (IA->hasSideEffects())
7405 14413 : Flags |= InlineAsm::Extra_HasSideEffects;
7406 16914 : if (IA->isAlignStack())
7407 21 : Flags |= InlineAsm::Extra_IsAlignStack;
7408 16914 : if (CS.isConvergent())
7409 1 : Flags |= InlineAsm::Extra_IsConvergent;
7410 16914 : Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
7411 16914 : }
7412 :
7413 0 : void update(const TargetLowering::AsmOperandInfo &OpInfo) {
7414 : // Ideally, we would only check against memory constraints. However, the
7415 : // meaning of an Other constraint can be target-specific and we can't easily
7416 : // reason about it. Therefore, be conservative and set MayLoad/MayStore
7417 : // for Other constraints as well.
7418 0 : if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
7419 : OpInfo.ConstraintType == TargetLowering::C_Other) {
7420 5144 : if (OpInfo.Type == InlineAsm::isInput)
7421 4365 : Flags |= InlineAsm::Extra_MayLoad;
7422 779 : else if (OpInfo.Type == InlineAsm::isOutput)
7423 217 : Flags |= InlineAsm::Extra_MayStore;
7424 562 : else if (OpInfo.Type == InlineAsm::isClobber)
7425 562 : Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
7426 : }
7427 0 : }
7428 :
7429 0 : unsigned get() const { return Flags; }
7430 : };
7431 :
7432 : } // end anonymous namespace
7433 :
7434 : /// visitInlineAsm - Handle a call to an InlineAsm object.
7435 16914 : void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
7436 : const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
7437 :
7438 : /// ConstraintOperands - Information about all of the constraints.
7439 16028 : SDISelAsmOperandInfoVector ConstraintOperands;
7440 :
7441 16914 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7442 : TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
7443 49856 : DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS);
7444 :
7445 : bool hasMemory = false;
7446 :
7447 : // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
7448 16914 : ExtraFlags ExtraInfo(CS);
7449 :
7450 : unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
7451 : unsigned ResNo = 0; // ResNo - The result number of the next output.
7452 102410 : for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
7453 205746 : ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
7454 : SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
7455 :
7456 : MVT OpVT = MVT::Other;
7457 :
7458 : // Compute the value type for each operand.
7459 68582 : if (OpInfo.Type == InlineAsm::isInput ||
7460 4269 : (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) {
7461 22702 : OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
7462 :
7463 : // Process the call argument. BasicBlocks are labels, currently appearing
7464 : // only in asm's.
7465 11351 : if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
7466 2 : OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
7467 : } else {
7468 11349 : OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
7469 : }
7470 :
7471 : OpVT =
7472 : OpInfo
7473 11351 : .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout())
7474 : .getSimpleVT();
7475 : }
7476 :
7477 68582 : if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
7478 : // The return value of the call is this value. As such, there is no
7479 : // corresponding argument.
7480 : assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
7481 : if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
7482 628 : OpVT = TLI.getSimpleValueType(DAG.getDataLayout(),
7483 1256 : STy->getElementType(ResNo));
7484 : } else {
7485 : assert(ResNo == 0 && "Asm only has one result!");
7486 3384 : OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
7487 : }
7488 4012 : ++ResNo;
7489 : }
7490 :
7491 68582 : OpInfo.ConstraintVT = OpVT;
7492 :
7493 68582 : if (!hasMemory)
7494 64372 : hasMemory = OpInfo.hasMemory(TLI);
7495 :
7496 : // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
7497 : // FIXME: Could we compute this on OpInfo rather than TargetConstraints[i]?
7498 205746 : auto TargetConstraint = TargetConstraints[i];
7499 :
7500 : // Compute the constraint code and ConstraintType to use.
7501 68582 : TLI.ComputeConstraintToUse(TargetConstraint, SDValue());
7502 :
7503 68582 : ExtraInfo.update(TargetConstraint);
7504 : }
7505 :
7506 16914 : SDValue Chain, Flag;
7507 :
7508 : // We won't need to flush pending loads if this asm doesn't touch
7509 : // memory and is nonvolatile.
7510 16914 : if (hasMemory || IA->hasSideEffects())
7511 15784 : Chain = getRoot();
7512 : else
7513 1130 : Chain = DAG.getRoot();
7514 :
7515 : // Second pass over the constraints: compute which constraint option to use
7516 : // and assign registers to constraints that want a specific physreg.
7517 85496 : for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
7518 68582 : SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
7519 :
7520 : // If this is an output operand with a matching input operand, look up the
7521 : // matching input. If their types mismatch, e.g. one is an integer, the
7522 : // other is floating point, or their sizes are different, flag it as an
7523 : // error.
7524 68582 : if (OpInfo.hasMatchingInput()) {
7525 339 : SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
7526 339 : patchMatchingInput(OpInfo, Input, DAG);
7527 : }
7528 :
7529 : // Compute the constraint code and ConstraintType to use.
7530 68582 : TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
7531 :
7532 68582 : if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
7533 3669 : OpInfo.Type == InlineAsm::isClobber)
7534 : continue;
7535 :
7536 : // If this is a memory input, and if the operand is not indirect, do what we
7537 : // need to provide an address for the memory input.
7538 68020 : if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
7539 3107 : !OpInfo.isIndirect) {
7540 : assert((OpInfo.isMultipleAlternative ||
7541 : (OpInfo.Type == InlineAsm::isInput)) &&
7542 : "Can only indirectify direct input operands!");
7543 :
7544 : // Memory operands really want the address of the value.
7545 172 : Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG);
7546 :
7547 : // There is no longer a Value* corresponding to this operand.
7548 86 : OpInfo.CallOperandVal = nullptr;
7549 :
7550 : // It is now an indirect operand.
7551 86 : OpInfo.isIndirect = true;
7552 : }
7553 :
7554 : // If this constraint is for a specific register, allocate it before
7555 : // anything else.
7556 : SDISelAsmOperandInfo &RefOpInfo =
7557 68020 : OpInfo.isMatchingInputConstraint()
7558 375 : ? ConstraintOperands[OpInfo.getMatchedOperand()]
7559 68020 : : ConstraintOperands[i];
7560 68020 : if (RefOpInfo.ConstraintType == TargetLowering::C_Register)
7561 162675 : GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo);
7562 : }
7563 :
7564 : // Third pass - Loop over all of the operands, assigning virtual or physregs
7565 : // to register class operands.
7566 85496 : for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
7567 68582 : SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
7568 : SDISelAsmOperandInfo &RefOpInfo =
7569 68582 : OpInfo.isMatchingInputConstraint()
7570 375 : ? ConstraintOperands[OpInfo.getMatchedOperand()]
7571 68582 : : ConstraintOperands[i];
7572 :
7573 : // C_Register operands have already been allocated, Other/Memory don't need
7574 : // to be.
7575 68582 : if (RefOpInfo.ConstraintType == TargetLowering::C_RegisterClass)
7576 27432 : GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo, RefOpInfo);
7577 : }
7578 :
7579 : // AsmNodeOperands - The operands for the ISD::INLINEASM node.
7580 : std::vector<SDValue> AsmNodeOperands;
7581 16914 : AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
7582 33828 : AsmNodeOperands.push_back(DAG.getTargetExternalSymbol(
7583 50742 : IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout())));
7584 :
7585 : // If we have a !srcloc metadata node associated with it, we want to attach
7586 : // this to the ultimately generated inline asm machineinstr. To do this, we
7587 : // pass in the third operand as this (potentially null) inline asm MDNode.
7588 16914 : const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
7589 16914 : AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
7590 :
7591 : // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
7592 : // bits as operand 3.
7593 16914 : AsmNodeOperands.push_back(DAG.getTargetConstant(
7594 67656 : ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
7595 :
7596 : // Loop over all of the inputs, copying the operand values into the
7597 : // appropriate registers and processing the output regs.
7598 16028 : RegsForValue RetValRegs;
7599 :
7600 : // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
7601 16028 : std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit;
7602 :
7603 85391 : for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
7604 68550 : SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
7605 :
7606 68550 : switch (OpInfo.Type) {
7607 4268 : case InlineAsm::isOutput:
7608 4268 : if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
7609 : OpInfo.ConstraintType != TargetLowering::C_Register) {
7610 : // Memory output, or 'other' output (e.g. 'X' constraint).
7611 : assert(OpInfo.isIndirect && "Memory output must be indirect operand");
7612 :
7613 : unsigned ConstraintID =
7614 434 : TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
7615 : assert(ConstraintID != InlineAsm::Constraint_Unknown &&
7616 : "Failed to convert memory constraint code to constraint id.");
7617 :
7618 : // Add information to the INLINEASM node to know about this output.
7619 : unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
7620 : OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
7621 434 : AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
7622 217 : MVT::i32));
7623 217 : AsmNodeOperands.push_back(OpInfo.CallOperand);
7624 217 : break;
7625 4051 : }
7626 :
7627 : // Otherwise, this is a register or register class output.
7628 :
7629 : // Copy the output from the appropriate register. Find a register that
7630 : // we can use.
7631 4051 : if (OpInfo.AssignedRegs.Regs.empty()) {
7632 27 : emitInlineAsmError(
7633 27 : CS, "couldn't allocate output register for constraint '" +
7634 27 : Twine(OpInfo.ConstraintCode) + "'");
7635 886 : return;
7636 : }
7637 :
7638 : // If this is an indirect operand, store through the pointer after the
7639 : // asm.
7640 4024 : if (OpInfo.isIndirect) {
7641 78 : IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
7642 : OpInfo.CallOperandVal));
7643 : } else {
7644 : // This is the result value of the call.
7645 : assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
7646 : // Concatenate this output onto the outputs list.
7647 3985 : RetValRegs.append(OpInfo.AssignedRegs);
7648 : }
7649 :
7650 : // Add information to the INLINEASM node to know that this register is
7651 : // set.
7652 : OpInfo.AssignedRegs
7653 7827 : .AddInlineAsmOperands(OpInfo.isEarlyClobber
7654 : ? InlineAsm::Kind_RegDefEarlyClobber
7655 : : InlineAsm::Kind_RegDef,
7656 4024 : false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
7657 4024 : break;
7658 :
7659 11070 : case InlineAsm::isInput: {
7660 11070 : SDValue InOperandVal = OpInfo.CallOperand;
7661 :
7662 11070 : if (OpInfo.isMatchingInputConstraint()) {
7663 : // If this is required to match an output register we have already set,
7664 : // just use its register.
7665 368 : auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(),
7666 : AsmNodeOperands);
7667 : unsigned OpFlag =
7668 1104 : cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
7669 368 : if (InlineAsm::isRegDefKind(OpFlag) ||
7670 : InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
7671 : // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
7672 333 : if (OpInfo.isIndirect) {
7673 : // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
7674 1 : emitInlineAsmError(CS, "inline asm not supported yet:"
7675 : " don't know how to handle tied "
7676 : "indirect register inputs");
7677 1 : return;
7678 : }
7679 :
7680 664 : MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType();
7681 : SmallVector<unsigned, 4> Regs;
7682 :
7683 664 : if (!createVirtualRegs(Regs,
7684 : InlineAsm::getNumOperandRegisters(OpFlag),
7685 : RegVT, DAG)) {
7686 0 : emitInlineAsmError(CS, "inline asm error: This value type register "
7687 : "class is not natively supported!");
7688 : return;
7689 : }
7690 :
7691 996 : RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());
7692 :
7693 332 : SDLoc dl = getCurSDLoc();
7694 : // Use the produced MatchedRegs object to
7695 332 : MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
7696 : CS.getInstruction());
7697 332 : MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
7698 : true, OpInfo.getMatchedOperand(), dl,
7699 : DAG, AsmNodeOperands);
7700 : break;
7701 : }
7702 :
7703 : assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
7704 : assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
7705 : "Unexpected number of operands");
7706 : // Add information to the INLINEASM node to know about this input.
7707 : // See InlineAsm.h isUseOperandTiedToDef.
7708 : OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
7709 35 : OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
7710 : OpInfo.getMatchedOperand());
7711 35 : AsmNodeOperands.push_back(DAG.getTargetConstant(
7712 105 : OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
7713 70 : AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
7714 35 : break;
7715 : }
7716 :
7717 : // Treat indirect 'X' constraint as memory.
7718 10702 : if (OpInfo.ConstraintType == TargetLowering::C_Other &&
7719 1506 : OpInfo.isIndirect)
7720 1 : OpInfo.ConstraintType = TargetLowering::C_Memory;
7721 :
7722 10702 : if (OpInfo.ConstraintType == TargetLowering::C_Other) {
7723 : std::vector<SDValue> Ops;
7724 1505 : TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
7725 1505 : Ops, DAG);
7726 1505 : if (Ops.empty()) {
7727 19 : emitInlineAsmError(CS, "invalid operand for inline asm constraint '" +
7728 19 : Twine(OpInfo.ConstraintCode) + "'");
7729 : return;
7730 : }
7731 :
7732 : // Add information to the INLINEASM node to know about this input.
7733 : unsigned ResOpType =
7734 1486 : InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
7735 1486 : AsmNodeOperands.push_back(DAG.getTargetConstant(
7736 5944 : ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
7737 1486 : AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
7738 : break;
7739 : }
7740 :
7741 9197 : if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
7742 : assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
7743 : assert(InOperandVal.getValueType() ==
7744 : TLI.getPointerTy(DAG.getDataLayout()) &&
7745 : "Memory operands expect pointer values");
7746 :
7747 : unsigned ConstraintID =
7748 5788 : TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
7749 : assert(ConstraintID != InlineAsm::Constraint_Unknown &&
7750 : "Failed to convert memory constraint code to constraint id.");
7751 :
7752 : // Add information to the INLINEASM node to know about this input.
7753 : unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
7754 : ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
7755 2894 : AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
7756 2894 : getCurSDLoc(),
7757 2894 : MVT::i32));
7758 2894 : AsmNodeOperands.push_back(InOperandVal);
7759 2894 : break;
7760 : }
7761 :
7762 : assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
7763 : OpInfo.ConstraintType == TargetLowering::C_Register) &&
7764 : "Unknown constraint type!");
7765 :
7766 : // TODO: Support this.
7767 6303 : if (OpInfo.isIndirect) {
7768 2 : emitInlineAsmError(
7769 : CS, "Don't know how to handle indirect register inputs yet "
7770 2 : "for constraint '" +
7771 2 : Twine(OpInfo.ConstraintCode) + "'");
7772 2 : return;
7773 : }
7774 :
7775 : // Copy the input into the appropriate registers.
7776 6301 : if (OpInfo.AssignedRegs.Regs.empty()) {
7777 24 : emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" +
7778 24 : Twine(OpInfo.ConstraintCode) + "'");
7779 24 : return;
7780 : }
7781 :
7782 6277 : SDLoc dl = getCurSDLoc();
7783 :
7784 6277 : OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl,
7785 : Chain, &Flag, CS.getInstruction());
7786 :
7787 6277 : OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
7788 : dl, DAG, AsmNodeOperands);
7789 : break;
7790 : }
7791 53212 : case InlineAsm::isClobber:
7792 : // Add the clobbered value to the operand list, so that the register
7793 : // allocator is aware that the physreg got clobbered.
7794 53212 : if (!OpInfo.AssignedRegs.Regs.empty())
7795 49157 : OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
7796 98314 : false, 0, getCurSDLoc(), DAG,
7797 : AsmNodeOperands);
7798 : break;
7799 : }
7800 : }
7801 :
7802 : // Finish up input operands. Set the input chain and add the flag last.
7803 16841 : AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
7804 16841 : if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
7805 :
7806 33682 : Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(),
7807 33682 : DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
7808 16841 : Flag = Chain.getValue(1);
7809 :
7810 : // If this asm returns a register value, copy the result from that register
7811 : // and set it as the value of the call.
7812 16841 : if (!RetValRegs.Regs.empty()) {
7813 7094 : SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
7814 3547 : Chain, &Flag, CS.getInstruction());
7815 :
7816 3547 : llvm::Type *CSResultType = CS.getType();
7817 : unsigned numRet;
7818 : ArrayRef<Type *> ResultTypes;
7819 3547 : SmallVector<SDValue, 1> ResultValues(1);
7820 3547 : if (CSResultType->isSingleValueType()) {
7821 : numRet = 1;
7822 3343 : ResultValues[0] = Val;
7823 : ResultTypes = makeArrayRef(CSResultType);
7824 : } else {
7825 204 : numRet = CSResultType->getNumContainedTypes();
7826 : assert(Val->getNumOperands() == numRet &&
7827 : "Mismatch in number of output operands in asm result");
7828 : ResultTypes = CSResultType->subtypes();
7829 : ArrayRef<SDUse> ValueUses = Val->ops();
7830 204 : ResultValues.resize(numRet);
7831 : std::transform(ValueUses.begin(), ValueUses.end(), ResultValues.begin(),
7832 626 : [](const SDUse &u) -> SDValue { return u.get(); });
7833 : }
7834 7094 : SmallVector<EVT, 1> ResultVTs(numRet);
7835 7516 : for (unsigned i = 0; i < numRet; i++) {
7836 7938 : EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), ResultTypes[i]);
7837 3969 : SDValue Val = ResultValues[i];
7838 : assert(ResultTypes[i]->isSized() && "Unexpected unsized type");
7839 : // If the type of the inline asm call site return value is different but
7840 : // has same size as the type of the asm output bitcast it. One example
7841 : // of this is for vectors with different width / number of elements.
7842 : // This can happen for register classes that can contain multiple
7843 : // different value types. The preg or vreg allocated may not have the
7844 : // same VT as was expected.
7845 : //
7846 : // This can also happen for a return value that disagrees with the
7847 : // register class it is put in, eg. a double in a general-purpose
7848 : // register on a 32-bit machine.
7849 8054 : if (ResultVT != Val.getValueType() &&
7850 116 : ResultVT.getSizeInBits() == Val.getValueSizeInBits())
7851 348 : Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, Val);
7852 3853 : else if (ResultVT != Val.getValueType() && ResultVT.isInteger() &&
7853 3853 : Val.getValueType().isInteger()) {
7854 : // If a result value was tied to an input value, the computed result
7855 : // may have a wider width than the expected result. Extract the
7856 : // relevant portion.
7857 0 : Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, Val);
7858 : }
7859 :
7860 : assert(ResultVT == Val.getValueType() && "Asm result value mismatch!");
7861 3969 : ResultVTs[i] = ResultVT;
7862 3969 : ResultValues[i] = Val;
7863 : }
7864 :
7865 7094 : Val = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
7866 10641 : DAG.getVTList(ResultVTs), ResultValues);
7867 3547 : setValue(CS.getInstruction(), Val);
7868 : // Don't need to use this as a chain in this case.
7869 3547 : if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
7870 : return;
7871 : }
7872 :
7873 : std::vector<std::pair<SDValue, const Value *>> StoresToEmit;
7874 :
7875 : // Process indirect outputs, first output all of the flagged copies out of
7876 : // physregs.
7877 32092 : for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
7878 36 : RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
7879 36 : const Value *Ptr = IndirectStoresToEmit[i].second;
7880 72 : SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
7881 36 : Chain, &Flag, IA);
7882 36 : StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
7883 : }
7884 :
7885 : // Emit the non-flagged stores from the physregs.
7886 : SmallVector<SDValue, 8> OutChains;
7887 32092 : for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
7888 108 : SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first,
7889 : getValue(StoresToEmit[i].second),
7890 144 : MachinePointerInfo(StoresToEmit[i].second));
7891 36 : OutChains.push_back(Val);
7892 : }
7893 :
7894 16028 : if (!OutChains.empty())
7895 99 : Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);
7896 :
7897 16028 : DAG.setRoot(Chain);
7898 : }
7899 :
7900 73 : void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS,
7901 : const Twine &Message) {
7902 73 : LLVMContext &Ctx = *DAG.getContext();
7903 73 : Ctx.emitError(CS.getInstruction(), Message);
7904 :
7905 : // Make sure we leave the DAG in a valid state
7906 73 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7907 : SmallVector<EVT, 1> ValueVTs;
7908 73 : ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
7909 :
7910 73 : if (ValueVTs.empty())
7911 : return;
7912 :
7913 : SmallVector<SDValue, 1> Ops;
7914 85 : for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i)
7915 86 : Ops.push_back(DAG.getUNDEF(ValueVTs[i]));
7916 :
7917 126 : setValue(CS.getInstruction(), DAG.getMergeValues(Ops, getCurSDLoc()));
7918 : }
7919 :
7920 260 : void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
7921 520 : DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(),
7922 : MVT::Other, getRoot(),
7923 : getValue(I.getArgOperand(0)),
7924 780 : DAG.getSrcValue(I.getArgOperand(0))));
7925 260 : }
7926 :
7927 184 : void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
7928 184 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7929 184 : const DataLayout &DL = DAG.getDataLayout();
7930 368 : SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()),
7931 184 : getCurSDLoc(), getRoot(), getValue(I.getOperand(0)),
7932 184 : DAG.getSrcValue(I.getOperand(0)),
7933 736 : DL.getABITypeAlignment(I.getType()));
7934 184 : setValue(&I, V);
7935 184 : DAG.setRoot(V.getValue(1));
7936 184 : }
7937 :
7938 194 : void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
7939 388 : DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(),
7940 : MVT::Other, getRoot(),
7941 : getValue(I.getArgOperand(0)),
7942 582 : DAG.getSrcValue(I.getArgOperand(0))));
7943 194 : }
7944 :
7945 21 : void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
7946 21 : DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(),
7947 : MVT::Other, getRoot(),
7948 : getValue(I.getArgOperand(0)),
7949 : getValue(I.getArgOperand(1)),
7950 21 : DAG.getSrcValue(I.getArgOperand(0)),
7951 105 : DAG.getSrcValue(I.getArgOperand(1))));
7952 21 : }
7953 :
7954 423841 : SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
7955 : const Instruction &I,
7956 : SDValue Op) {
7957 : const MDNode *Range = I.getMetadata(LLVMContext::MD_range);
7958 26590 : if (!Range)
7959 420295 : return Op;
7960 :
7961 7092 : ConstantRange CR = getConstantRangeFromMetadata(*Range);
7962 3546 : if (CR.isFullSet() || CR.isEmptySet() || CR.isWrappedSet())
7963 0 : return Op;
7964 :
7965 3546 : APInt Lo = CR.getUnsignedMin();
7966 3546 : if (!Lo.isMinValue())
7967 1 : return Op;
7968 :
7969 3545 : APInt Hi = CR.getUnsignedMax();
7970 : unsigned Bits = Hi.getActiveBits();
7971 :
7972 3545 : EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
7973 :
7974 3545 : SDLoc SL = getCurSDLoc();
7975 :
7976 : SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op,
7977 3545 : DAG.getValueType(SmallVT));
7978 3545 : unsigned NumVals = Op.getNode()->getNumValues();
7979 3545 : if (NumVals == 1)
7980 3541 : return ZExt;
7981 :
7982 : SmallVector<SDValue, 4> Ops;
7983 :
7984 4 : Ops.push_back(ZExt);
7985 12 : for (unsigned I = 1; I != NumVals; ++I)
7986 8 : Ops.push_back(Op.getValue(I));
7987 :
7988 4 : return DAG.getMergeValues(Ops, SL);
7989 : }
7990 :
7991 : /// Populate a CallLowerinInfo (into \p CLI) based on the properties of
7992 : /// the call being lowered.
7993 : ///
7994 : /// This is a helper for lowering intrinsics that follow a target calling
7995 : /// convention or require stack pointer adjustment. Only a subset of the
7996 : /// intrinsic's operands need to participate in the calling convention.
7997 216 : void SelectionDAGBuilder::populateCallLoweringInfo(
7998 : TargetLowering::CallLoweringInfo &CLI, ImmutableCallSite CS,
7999 : unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
8000 : bool IsPatchPoint) {
8001 : TargetLowering::ArgListTy Args;
8002 216 : Args.reserve(NumArgs);
8003 :
8004 : // Populate the argument list.
8005 : // Attributes for args start at offset 1, after the return attribute.
8006 413 : for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
8007 413 : ArgI != ArgE; ++ArgI) {
8008 197 : const Value *V = CS->getOperand(ArgI);
8009 :
8010 : assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
8011 :
8012 : TargetLowering::ArgListEntry Entry;
8013 197 : Entry.Node = getValue(V);
8014 197 : Entry.Ty = V->getType();
8015 197 : Entry.setAttributes(&CS, ArgI);
8016 197 : Args.push_back(Entry);
8017 : }
8018 :
8019 432 : CLI.setDebugLoc(getCurSDLoc())
8020 216 : .setChain(getRoot())
8021 216 : .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args))
8022 216 : .setDiscardResult(CS->use_empty())
8023 : .setIsPatchPoint(IsPatchPoint);
8024 216 : }
8025 :
8026 : /// Add a stack map intrinsic call's live variable operands to a stackmap
8027 : /// or patchpoint target node's operand list.
8028 : ///
8029 : /// Constants are converted to TargetConstants purely as an optimization to
8030 : /// avoid constant materialization and register allocation.
8031 : ///
8032 : /// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
8033 : /// generate addess computation nodes, and so ExpandISelPseudo can convert the
8034 : /// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
8035 : /// address materialization and register allocation, but may also be required
8036 : /// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
8037 : /// alloca in the entry block, then the runtime may assume that the alloca's
8038 : /// StackMap location can be read immediately after compilation and that the
8039 : /// location is valid at any point during execution (this is similar to the
8040 : /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
8041 : /// only available in a register, then the runtime would need to trap when
8042 : /// execution reaches the StackMap in order to read the alloca's location.
8043 286 : static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
8044 : const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
8045 : SelectionDAGBuilder &Builder) {
8046 671 : for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) {
8047 385 : SDValue OpVal = Builder.getValue(CS.getArgument(i));
8048 : if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) {
8049 116 : Ops.push_back(
8050 58 : Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64));
8051 58 : Ops.push_back(
8052 116 : Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64));
8053 : } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
8054 21 : const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
8055 21 : Ops.push_back(Builder.DAG.getTargetFrameIndex(
8056 21 : FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout())));
8057 : } else
8058 306 : Ops.push_back(OpVal);
8059 : }
8060 286 : }
8061 :
8062 : /// Lower llvm.experimental.stackmap directly to its target opcode.
8063 140 : void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
8064 : // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>,
8065 : // [live variables...])
8066 :
8067 : assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
8068 :
8069 140 : SDValue Chain, InFlag, Callee, NullPtr;
8070 : SmallVector<SDValue, 32> Ops;
8071 :
8072 140 : SDLoc DL = getCurSDLoc();
8073 140 : Callee = getValue(CI.getCalledValue());
8074 140 : NullPtr = DAG.getIntPtrConstant(0, DL, true);
8075 :
8076 : // The stackmap intrinsic only records the live variables (the arguemnts
8077 : // passed to it) and emits NOPS (if requested). Unlike the patchpoint
8078 : // intrinsic, this won't be lowered to a function call. This means we don't
8079 : // have to worry about calling conventions and target specific lowering code.
8080 : // Instead we perform the call lowering right here.
8081 : //
8082 : // chain, flag = CALLSEQ_START(chain, 0, 0)
8083 : // chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
8084 : // chain, flag = CALLSEQ_END(chain, 0, 0, flag)
8085 : //
8086 140 : Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL);
8087 140 : InFlag = Chain.getValue(1);
8088 :
8089 : // Add the <id> and <numBytes> constants.
8090 140 : SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos));
8091 280 : Ops.push_back(DAG.getTargetConstant(
8092 280 : cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64));
8093 140 : SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos));
8094 280 : Ops.push_back(DAG.getTargetConstant(
8095 : cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL,
8096 280 : MVT::i32));
8097 :
8098 : // Push live variables for the stack map.
8099 140 : addStackMapLiveVars(&CI, 2, DL, Ops, *this);
8100 :
8101 : // We are not pushing any register mask info here on the operands list,
8102 : // because the stackmap doesn't clobber anything.
8103 :
8104 : // Push the chain and the glue flag.
8105 140 : Ops.push_back(Chain);
8106 140 : Ops.push_back(InFlag);
8107 :
8108 : // Create the STACKMAP node.
8109 280 : SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8110 280 : SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops);
8111 140 : Chain = SDValue(SM, 0);
8112 140 : InFlag = Chain.getValue(1);
8113 :
8114 140 : Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);
8115 :
8116 : // Stackmaps don't generate values, so nothing goes into the NodeMap.
8117 :
8118 : // Set the root to the target-lowered call chain.
8119 140 : DAG.setRoot(Chain);
8120 :
8121 : // Inform the Frame Information that we have a stackmap in this function.
8122 140 : FuncInfo.MF->getFrameInfo().setHasStackMap();
8123 140 : }
8124 :
8125 : /// Lower llvm.experimental.patchpoint directly to its target opcode.
8126 146 : void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
8127 : const BasicBlock *EHPadBB) {
8128 : // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
8129 : // i32 <numBytes>,
8130 : // i8* <target>,
8131 : // i32 <numArgs>,
8132 : // [Args...],
8133 : // [live variables...])
8134 :
8135 : CallingConv::ID CC = CS.getCallingConv();
8136 146 : bool IsAnyRegCC = CC == CallingConv::AnyReg;
8137 146 : bool HasDef = !CS->getType()->isVoidTy();
8138 146 : SDLoc dl = getCurSDLoc();
8139 292 : SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos));
8140 :
8141 : // Handle immediate and symbolic callees.
8142 : if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee))
8143 141 : Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl,
8144 282 : /*isTarget=*/true);
8145 : else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee))
8146 10 : Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(),
8147 5 : SDLoc(SymbolicCallee),
8148 5 : SymbolicCallee->getValueType(0));
8149 :
8150 : // Get the real number of arguments participating in the call <numArgs>
8151 146 : SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos));
8152 146 : unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();
8153 :
8154 : // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
8155 : // Intrinsics include all meta-operands up to but not including CC.
8156 : unsigned NumMetaOpers = PatchPointOpers::CCPos;
8157 : assert(CS.arg_size() >= NumMetaOpers + NumArgs &&
8158 : "Not enough arguments provided to the patchpoint intrinsic");
8159 :
8160 : // For AnyRegCC the arguments are lowered later on manually.
8161 146 : unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
8162 : Type *ReturnTy =
8163 146 : IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
8164 :
8165 292 : TargetLowering::CallLoweringInfo CLI(DAG);
8166 146 : populateCallLoweringInfo(CLI, CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy,
8167 : true);
8168 146 : std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
8169 :
8170 146 : SDNode *CallEnd = Result.second.getNode();
8171 146 : if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
8172 34 : CallEnd = CallEnd->getOperand(0).getNode();
8173 :
8174 : /// Get a call instruction from the call sequence chain.
8175 : /// Tail calls are not allowed.
8176 : assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
8177 : "Expected a callseq node.");
8178 146 : SDNode *Call = CallEnd->getOperand(0).getNode();
8179 : bool HasGlue = Call->getGluedNode();
8180 :
8181 : // Replace the target specific call node with the patchable intrinsic.
8182 : SmallVector<SDValue, 8> Ops;
8183 :
8184 : // Add the <id> and <numBytes> constants.
8185 146 : SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos));
8186 292 : Ops.push_back(DAG.getTargetConstant(
8187 292 : cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64));
8188 146 : SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos));
8189 146 : Ops.push_back(DAG.getTargetConstant(
8190 : cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl,
8191 292 : MVT::i32));
8192 :
8193 : // Add the callee.
8194 146 : Ops.push_back(Callee);
8195 :
8196 : // Adjust <numArgs> to account for any arguments that have been passed on the
8197 : // stack instead.
8198 : // Call Node: Chain, Target, {Args}, RegMask, [Glue]
8199 292 : unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3);
8200 146 : NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs;
8201 146 : Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32));
8202 :
8203 : // Add the calling convention
8204 146 : Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32));
8205 :
8206 : // Add the arguments we omitted previously. The register allocator should
8207 : // place these in any free register.
8208 146 : if (IsAnyRegCC)
8209 329 : for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
8210 263 : Ops.push_back(getValue(CS.getArgument(i)));
8211 :
8212 : // Push the arguments from the call instruction up to the register mask.
8213 146 : SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
8214 146 : Ops.append(Call->op_begin() + 2, e);
8215 :
8216 : // Push live variables for the stack map.
8217 146 : addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this);
8218 :
8219 : // Push the register mask info.
8220 146 : if (HasGlue)
8221 116 : Ops.push_back(*(Call->op_end()-2));
8222 : else
8223 176 : Ops.push_back(*(Call->op_end()-1));
8224 :
8225 : // Push the chain (this is originally the first operand of the call, but
8226 : // becomes now the last or second to last operand).
8227 292 : Ops.push_back(*(Call->op_begin()));
8228 :
8229 : // Push the glue flag (last operand).
8230 146 : if (HasGlue)
8231 116 : Ops.push_back(*(Call->op_end()-1));
8232 :
8233 : SDVTList NodeTys;
8234 146 : if (IsAnyRegCC && HasDef) {
8235 : // Create the return types based on the intrinsic definition
8236 47 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8237 : SmallVector<EVT, 3> ValueVTs;
8238 47 : ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
8239 : assert(ValueVTs.size() == 1 && "Expected only one return value type.");
8240 :
8241 : // There is always a chain and a glue type at the end
8242 47 : ValueVTs.push_back(MVT::Other);
8243 47 : ValueVTs.push_back(MVT::Glue);
8244 94 : NodeTys = DAG.getVTList(ValueVTs);
8245 : } else
8246 198 : NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8247 :
8248 : // Replace the target specific call node with a PATCHPOINT node.
8249 292 : MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT,
8250 : dl, NodeTys, Ops);
8251 :
8252 : // Update the NodeMap.
8253 146 : if (HasDef) {
8254 81 : if (IsAnyRegCC)
8255 47 : setValue(CS.getInstruction(), SDValue(MN, 0));
8256 : else
8257 34 : setValue(CS.getInstruction(), Result.first);
8258 : }
8259 :
8260 : // Fixup the consumers of the intrinsic. The chain and glue may be used in the
8261 : // call sequence. Furthermore the location of the chain and glue can change
8262 : // when the AnyReg calling convention is used and the intrinsic returns a
8263 : // value.
8264 146 : if (IsAnyRegCC && HasDef) {
8265 : SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
8266 : SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)};
8267 47 : DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
8268 : } else
8269 99 : DAG.ReplaceAllUsesWith(Call, MN);
8270 146 : DAG.DeleteNode(Call);
8271 :
8272 : // Inform the Frame Information that we have a patchpoint in this function.
8273 146 : FuncInfo.MF->getFrameInfo().setHasPatchPoint();
8274 146 : }
8275 :
8276 58 : void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
8277 : unsigned Intrinsic) {
8278 58 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8279 58 : SDValue Op1 = getValue(I.getArgOperand(0));
8280 58 : SDValue Op2;
8281 58 : if (I.getNumArgOperands() > 1)
8282 0 : Op2 = getValue(I.getArgOperand(1));
8283 58 : SDLoc dl = getCurSDLoc();
8284 58 : EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
8285 : SDValue Res;
8286 : FastMathFlags FMF;
8287 58 : if (isa<FPMathOperator>(I))
8288 4 : FMF = I.getFastMathFlags();
8289 :
8290 58 : switch (Intrinsic) {
8291 : case Intrinsic::experimental_vector_reduce_fadd:
8292 0 : if (FMF.isFast())
8293 0 : Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2);
8294 : else
8295 0 : Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2);
8296 : break;
8297 : case Intrinsic::experimental_vector_reduce_fmul:
8298 0 : if (FMF.isFast())
8299 0 : Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2);
8300 : else
8301 0 : Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2);
8302 : break;
8303 14 : case Intrinsic::experimental_vector_reduce_add:
8304 28 : Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1);
8305 14 : break;
8306 0 : case Intrinsic::experimental_vector_reduce_mul:
8307 0 : Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1);
8308 0 : break;
8309 0 : case Intrinsic::experimental_vector_reduce_and:
8310 0 : Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1);
8311 0 : break;
8312 0 : case Intrinsic::experimental_vector_reduce_or:
8313 0 : Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1);
8314 0 : break;
8315 0 : case Intrinsic::experimental_vector_reduce_xor:
8316 0 : Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1);
8317 0 : break;
8318 10 : case Intrinsic::experimental_vector_reduce_smax:
8319 20 : Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1);
8320 10 : break;
8321 10 : case Intrinsic::experimental_vector_reduce_smin:
8322 20 : Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1);
8323 10 : break;
8324 10 : case Intrinsic::experimental_vector_reduce_umax:
8325 20 : Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1);
8326 10 : break;
8327 10 : case Intrinsic::experimental_vector_reduce_umin:
8328 20 : Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
8329 10 : break;
8330 2 : case Intrinsic::experimental_vector_reduce_fmax:
8331 4 : Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1);
8332 2 : break;
8333 2 : case Intrinsic::experimental_vector_reduce_fmin:
8334 4 : Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1);
8335 2 : break;
8336 0 : default:
8337 0 : llvm_unreachable("Unhandled vector reduce intrinsic");
8338 : }
8339 58 : setValue(&I, Res);
8340 58 : }
8341 :
8342 : /// Returns an AttributeList representing the attributes applied to the return
8343 : /// value of the given call.
8344 1005206 : static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
8345 : SmallVector<Attribute::AttrKind, 2> Attrs;
8346 1005206 : if (CLI.RetSExt)
8347 3331 : Attrs.push_back(Attribute::SExt);
8348 1005206 : if (CLI.RetZExt)
8349 54176 : Attrs.push_back(Attribute::ZExt);
8350 1005206 : if (CLI.IsInReg)
8351 234 : Attrs.push_back(Attribute::InReg);
8352 :
8353 1005206 : return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
8354 1005206 : Attrs);
8355 : }
8356 :
8357 : /// TargetLowering::LowerCallTo - This is the default LowerCallTo
8358 : /// implementation, which just calls LowerCall.
8359 : /// FIXME: When all targets are
8360 : /// migrated to using LowerCall, this hook should be integrated into SDISel.
8361 : std::pair<SDValue, SDValue>
8362 1005206 : TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
8363 : // Handle the incoming return values from the call.
8364 : CLI.Ins.clear();
8365 1005206 : Type *OrigRetTy = CLI.RetTy;
8366 : SmallVector<EVT, 4> RetTys;
8367 : SmallVector<uint64_t, 4> Offsets;
8368 1005206 : auto &DL = CLI.DAG.getDataLayout();
8369 1005206 : ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
8370 :
8371 1005206 : if (CLI.IsPostTypeLegalization) {
8372 : // If we are lowering a libcall after legalization, split the return type.
8373 : SmallVector<EVT, 4> OldRetTys = std::move(RetTys);
8374 : SmallVector<uint64_t, 4> OldOffsets = std::move(Offsets);
8375 10206 : for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) {
8376 5103 : EVT RetVT = OldRetTys[i];
8377 5103 : uint64_t Offset = OldOffsets[i];
8378 5103 : MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT);
8379 5103 : unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT);
8380 5103 : unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
8381 5103 : RetTys.append(NumRegs, RegisterVT);
8382 10222 : for (unsigned j = 0; j != NumRegs; ++j)
8383 5119 : Offsets.push_back(Offset + j * RegisterVTByteSZ);
8384 : }
8385 : }
8386 :
8387 : SmallVector<ISD::OutputArg, 4> Outs;
8388 1005206 : GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
8389 :
8390 : bool CanLowerReturn =
8391 2010412 : this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
8392 1005206 : CLI.IsVarArg, Outs, CLI.RetTy->getContext());
8393 :
8394 : SDValue DemoteStackSlot;
8395 : int DemoteStackIdx = -100;
8396 1005204 : if (!CanLowerReturn) {
8397 : // FIXME: equivalent assert?
8398 : // assert(!CS.hasInAllocaArgument() &&
8399 : // "sret demotion is incompatible with inalloca");
8400 209 : uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
8401 209 : unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy);
8402 209 : MachineFunction &MF = CLI.DAG.getMachineFunction();
8403 209 : DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
8404 209 : Type *StackSlotPtrType = PointerType::get(CLI.RetTy,
8405 : DL.getAllocaAddrSpace());
8406 :
8407 209 : DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
8408 : ArgListEntry Entry;
8409 209 : Entry.Node = DemoteStackSlot;
8410 209 : Entry.Ty = StackSlotPtrType;
8411 : Entry.IsSExt = false;
8412 : Entry.IsZExt = false;
8413 : Entry.IsInReg = false;
8414 209 : Entry.IsSRet = true;
8415 : Entry.IsNest = false;
8416 : Entry.IsByVal = false;
8417 : Entry.IsReturned = false;
8418 : Entry.IsSwiftSelf = false;
8419 : Entry.IsSwiftError = false;
8420 209 : Entry.Alignment = Align;
8421 209 : CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
8422 209 : CLI.NumFixedArgs += 1;
8423 209 : CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
8424 :
8425 : // sret demotion isn't compatible with tail-calls, since the sret argument
8426 : // points into the callers stack frame.
8427 209 : CLI.IsTailCall = false;
8428 : } else {
8429 1413398 : for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
8430 408403 : EVT VT = RetTys[I];
8431 408403 : MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
8432 408403 : CLI.CallConv, VT);
8433 408403 : unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
8434 408403 : CLI.CallConv, VT);
8435 819031 : for (unsigned i = 0; i != NumRegs; ++i) {
8436 : ISD::InputArg MyFlags;
8437 410628 : MyFlags.VT = RegisterVT;
8438 410628 : MyFlags.ArgVT = VT;
8439 410628 : MyFlags.Used = CLI.IsReturnValueUsed;
8440 410628 : if (CLI.RetSExt)
8441 : MyFlags.Flags.setSExt();
8442 410628 : if (CLI.RetZExt)
8443 : MyFlags.Flags.setZExt();
8444 410628 : if (CLI.IsInReg)
8445 : MyFlags.Flags.setInReg();
8446 410628 : CLI.Ins.push_back(MyFlags);
8447 : }
8448 : }
8449 : }
8450 :
8451 : // We push in swifterror return as the last element of CLI.Ins.
8452 : ArgListTy &Args = CLI.getArgs();
8453 1005204 : if (supportSwiftError()) {
8454 3973627 : for (unsigned i = 0, e = Args.size(); i != e; ++i) {
8455 4150042 : if (Args[i].IsSwiftError) {
8456 : ISD::InputArg MyFlags;
8457 112 : MyFlags.VT = getPointerTy(DL);
8458 112 : MyFlags.ArgVT = EVT(getPointerTy(DL));
8459 : MyFlags.Flags.setSwiftError();
8460 112 : CLI.Ins.push_back(MyFlags);
8461 : }
8462 : }
8463 : }
8464 :
8465 : // Handle all of the outgoing arguments.
8466 : CLI.Outs.clear();
8467 : CLI.OutVals.clear();
8468 4178412 : for (unsigned i = 0, e = Args.size(); i != e; ++i) {
8469 : SmallVector<EVT, 4> ValueVTs;
8470 4336008 : ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
8471 : // FIXME: Split arguments if CLI.IsPostTypeLegalization
8472 2168004 : Type *FinalType = Args[i].Ty;
8473 2168004 : if (Args[i].IsByVal)
8474 2960 : FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
8475 2168004 : bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
8476 2168004 : FinalType, CLI.CallConv, CLI.IsVarArg);
8477 4337425 : for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
8478 : ++Value) {
8479 2169421 : EVT VT = ValueVTs[Value];
8480 2169421 : Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
8481 : SDValue Op = SDValue(Args[i].Node.getNode(),
8482 4338842 : Args[i].Node.getResNo() + Value);
8483 : ISD::ArgFlagsTy Flags;
8484 :
8485 : // Certain targets (such as MIPS), may have a different ABI alignment
8486 : // for a type depending on the context. Give the target a chance to
8487 : // specify the alignment it wants.
8488 2169421 : unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL);
8489 :
8490 4338842 : if (Args[i].IsZExt)
8491 : Flags.setZExt();
8492 2169421 : if (Args[i].IsSExt)
8493 : Flags.setSExt();
8494 2169421 : if (Args[i].IsInReg) {
8495 : // If we are using vectorcall calling convention, a structure that is
8496 : // passed InReg - is surely an HVA
8497 237 : if (CLI.CallConv == CallingConv::X86_VectorCall &&
8498 : isa<StructType>(FinalType)) {
8499 : // The first value of a structure is marked
8500 8 : if (0 == Value)
8501 : Flags.setHvaStart();
8502 : Flags.setHva();
8503 : }
8504 : // Set InReg Flag
8505 : Flags.setInReg();
8506 : }
8507 2169421 : if (Args[i].IsSRet)
8508 : Flags.setSRet();
8509 2169421 : if (Args[i].IsSwiftSelf)
8510 : Flags.setSwiftSelf();
8511 2169421 : if (Args[i].IsSwiftError)
8512 : Flags.setSwiftError();
8513 2169421 : if (Args[i].IsByVal)
8514 : Flags.setByVal();
8515 2169421 : if (Args[i].IsInAlloca) {
8516 : Flags.setInAlloca();
8517 : // Set the byval flag for CCAssignFn callbacks that don't know about
8518 : // inalloca. This way we can know how many bytes we should've allocated
8519 : // and how many bytes a callee cleanup function will pop. If we port
8520 : // inalloca to more targets, we'll have to add custom inalloca handling
8521 : // in the various CC lowering callbacks.
8522 : Flags.setByVal();
8523 : }
8524 2169421 : if (Args[i].IsByVal || Args[i].IsInAlloca) {
8525 2982 : PointerType *Ty = cast<PointerType>(Args[i].Ty);
8526 2982 : Type *ElementTy = Ty->getElementType();
8527 2982 : Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
8528 : // For ByVal, alignment should come from FE. BE will guess if this
8529 : // info is not there but there are cases it cannot get right.
8530 : unsigned FrameAlign;
8531 5964 : if (Args[i].Alignment)
8532 2730 : FrameAlign = Args[i].Alignment;
8533 : else
8534 252 : FrameAlign = getByValTypeAlignment(ElementTy, DL);
8535 : Flags.setByValAlign(FrameAlign);
8536 : }
8537 4338842 : if (Args[i].IsNest)
8538 : Flags.setNest();
8539 2169421 : if (NeedsRegBlock)
8540 : Flags.setInConsecutiveRegs();
8541 : Flags.setOrigAlign(OriginalAlignment);
8542 :
8543 2169421 : MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
8544 2169421 : CLI.CallConv, VT);
8545 2169421 : unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
8546 2169421 : CLI.CallConv, VT);
8547 2169421 : SmallVector<SDValue, 4> Parts(NumParts);
8548 : ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
8549 :
8550 4338842 : if (Args[i].IsSExt)
8551 : ExtendKind = ISD::SIGN_EXTEND;
8552 2155052 : else if (Args[i].IsZExt)
8553 : ExtendKind = ISD::ZERO_EXTEND;
8554 :
8555 : // Conservatively only handle 'returned' on non-vectors that can be lowered,
8556 : // for now.
8557 2169584 : if (Args[i].IsReturned && !Op.getValueType().isVector() &&
8558 : CanLowerReturn) {
8559 : assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
8560 : "unexpected use of 'returned'");
8561 : // Before passing 'returned' to the target lowering code, ensure that
8562 : // either the register MVT and the actual EVT are the same size or that
8563 : // the return value and argument are extended in the same way; in these
8564 : // cases it's safe to pass the argument register value unchanged as the
8565 : // return register value (although it's at the target's option whether
8566 : // to do so)
8567 : // TODO: allow code generation to take advantage of partially preserved
8568 : // registers rather than clobbering the entire register when the
8569 : // parameter extension method is not compatible with the return
8570 : // extension method
8571 162 : if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
8572 14 : (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt &&
8573 14 : CLI.RetZExt == Args[i].IsZExt))
8574 : Flags.setReturned();
8575 : }
8576 :
8577 2169421 : getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
8578 : CLI.CS.getInstruction(), CLI.CallConv, ExtendKind);
8579 :
8580 4346330 : for (unsigned j = 0; j != NumParts; ++j) {
8581 : // if it isn't first piece, alignment must be 1
8582 : ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
8583 2176909 : i < CLI.NumFixedArgs,
8584 8707636 : i, j*Parts[j].getValueType().getStoreSize());
8585 2176909 : if (NumParts > 1 && j == 0)
8586 : MyFlags.Flags.setSplit();
8587 2171374 : else if (j != 0) {
8588 : MyFlags.Flags.setOrigAlign(1);
8589 7488 : if (j == NumParts - 1)
8590 : MyFlags.Flags.setSplitEnd();
8591 : }
8592 :
8593 2176909 : CLI.Outs.push_back(MyFlags);
8594 4353818 : CLI.OutVals.push_back(Parts[j]);
8595 : }
8596 :
8597 2169421 : if (NeedsRegBlock && Value == NumValues - 1)
8598 1366 : CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
8599 : }
8600 : }
8601 :
8602 : SmallVector<SDValue, 4> InVals;
8603 1005204 : CLI.Chain = LowerCall(CLI, InVals);
8604 :
8605 : // Update CLI.InVals to use outside of this function.
8606 : CLI.InVals = InVals;
8607 :
8608 : // Verify that the target's LowerCall behaved as expected.
8609 : assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
8610 : "LowerCall didn't return a valid chain!");
8611 : assert((!CLI.IsTailCall || InVals.empty()) &&
8612 : "LowerCall emitted a return value for a tail call!");
8613 : assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
8614 : "LowerCall didn't emit the correct number of values!");
8615 :
8616 : // For a tail call, the return value is merely live-out and there aren't
8617 : // any nodes in the DAG representing it. Return a special value to
8618 : // indicate that a tail call has been emitted and no more Instructions
8619 : // should be processed in the current block.
8620 1005199 : if (CLI.IsTailCall) {
8621 5531 : CLI.DAG.setRoot(CLI.Chain);
8622 5531 : return std::make_pair(SDValue(), SDValue());
8623 : }
8624 :
8625 : #ifndef NDEBUG
8626 : for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
8627 : assert(InVals[i].getNode() && "LowerCall emitted a null value!");
8628 : assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
8629 : "LowerCall emitted a value with the wrong type!");
8630 : }
8631 : #endif
8632 :
8633 : SmallVector<SDValue, 4> ReturnValues;
8634 999668 : if (!CanLowerReturn) {
8635 : // The instruction result is the result of loading from the
8636 : // hidden sret parameter.
8637 : SmallVector<EVT, 1> PVTs;
8638 209 : Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace());
8639 :
8640 209 : ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
8641 : assert(PVTs.size() == 1 && "Pointers should fit in one register");
8642 209 : EVT PtrVT = PVTs[0];
8643 :
8644 209 : unsigned NumValues = RetTys.size();
8645 209 : ReturnValues.resize(NumValues);
8646 209 : SmallVector<SDValue, 4> Chains(NumValues);
8647 :
8648 : // An aggregate return value cannot wrap around the address space, so
8649 : // offsets to its parts don't wrap either.
8650 : SDNodeFlags Flags;
8651 : Flags.setNoUnsignedWrap(true);
8652 :
8653 588 : for (unsigned i = 0; i < NumValues; ++i) {
8654 379 : SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
8655 379 : CLI.DAG.getConstant(Offsets[i], CLI.DL,
8656 379 : PtrVT), Flags);
8657 379 : SDValue L = CLI.DAG.getLoad(
8658 : RetTys[i], CLI.DL, CLI.Chain, Add,
8659 : MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
8660 : DemoteStackIdx, Offsets[i]),
8661 379 : /* Alignment = */ 1);
8662 379 : ReturnValues[i] = L;
8663 758 : Chains[i] = L.getValue(1);
8664 : }
8665 :
8666 418 : CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
8667 : } else {
8668 : // Collect the legal value parts into potentially illegal values
8669 : // that correspond to the original function's return values.
8670 : Optional<ISD::NodeType> AssertOp;
8671 999459 : if (CLI.RetSExt)
8672 : AssertOp = ISD::AssertSext;
8673 996168 : else if (CLI.RetZExt)
8674 : AssertOp = ISD::AssertZext;
8675 : unsigned CurReg = 0;
8676 1406229 : for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
8677 406770 : EVT VT = RetTys[I];
8678 406770 : MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
8679 406770 : CLI.CallConv, VT);
8680 406770 : unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
8681 406770 : CLI.CallConv, VT);
8682 :
8683 406770 : ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
8684 : NumRegs, RegisterVT, VT, nullptr,
8685 406770 : CLI.CallConv, AssertOp));
8686 406770 : CurReg += NumRegs;
8687 : }
8688 :
8689 : // For a function returning void, there is no return value. We can't create
8690 : // such a node, so we just return a null return value in that case. In
8691 : // that case, nothing will actually look at the value.
8692 999459 : if (ReturnValues.empty())
8693 : return std::make_pair(SDValue(), CLI.Chain);
8694 : }
8695 :
8696 402849 : SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
8697 402849 : CLI.DAG.getVTList(RetTys), ReturnValues);
8698 402849 : return std::make_pair(Res, CLI.Chain);
8699 : }
8700 :
8701 2561 : void TargetLowering::LowerOperationWrapper(SDNode *N,
8702 : SmallVectorImpl<SDValue> &Results,
8703 : SelectionDAG &DAG) const {
8704 5122 : if (SDValue Res = LowerOperation(SDValue(N, 0), DAG))
8705 2413 : Results.push_back(Res);
8706 2561 : }
8707 :
8708 0 : SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
8709 0 : llvm_unreachable("LowerOperation not implemented for this target!");
8710 : }
8711 :
8712 : void
8713 856920 : SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
8714 856920 : SDValue Op = getNonRegisterValue(V);
8715 : assert((Op.getOpcode() != ISD::CopyFromReg ||
8716 : cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
8717 : "Copy from a reg to the same reg!");
8718 : assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
8719 :
8720 856920 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8721 : // If this is an InlineAsm we have to match the registers required, not the
8722 : // notional registers required by the type.
8723 :
8724 : RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
8725 1713840 : None); // This is not an ABI copy.
8726 856920 : SDValue Chain = DAG.getEntryNode();
8727 :
8728 856920 : ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
8729 856920 : FuncInfo.PreferredExtendType.end())
8730 1553899 : ? ISD::ANY_EXTEND
8731 696979 : : FuncInfo.PreferredExtendType[V];
8732 1553820 : RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
8733 856920 : PendingExports.push_back(Chain);
8734 856920 : }
8735 :
8736 : #include "llvm/CodeGen/SelectionDAGISel.h"
8737 :
8738 : /// isOnlyUsedInEntryBlock - If the specified argument is only used in the
8739 : /// entry block, return true. This includes arguments used by switches, since
8740 : /// the switch may expand into multiple basic blocks.
8741 130481 : static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
8742 : // With FastISel active, we may be splitting blocks, so force creation
8743 : // of virtual registers for all non-dead arguments.
8744 130481 : if (FastISel)
8745 108252 : return A->use_empty();
8746 :
8747 76355 : const BasicBlock &Entry = A->getParent()->front();
8748 161767 : for (const User *U : A->users())
8749 90306 : if (cast<Instruction>(U)->getParent() != &Entry || isa<SwitchInst>(U))
8750 : return false; // Use not in entry block.
8751 :
8752 : return true;
8753 : }
8754 :
8755 : using ArgCopyElisionMapTy =
8756 : DenseMap<const Argument *,
8757 : std::pair<const AllocaInst *, const StoreInst *>>;
8758 :
8759 : /// Scan the entry block of the function in FuncInfo for arguments that look
8760 : /// like copies into a local alloca. Record any copied arguments in
8761 : /// ArgCopyElisionCandidates.
8762 : static void
8763 210490 : findArgumentCopyElisionCandidates(const DataLayout &DL,
8764 : FunctionLoweringInfo *FuncInfo,
8765 : ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
8766 : // Record the state of every static alloca used in the entry block. Argument
8767 : // allocas are all used in the entry block, so we need approximately as many
8768 : // entries as we have arguments.
8769 : enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
8770 : SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
8771 210490 : unsigned NumArgs = FuncInfo->Fn->arg_size();
8772 210490 : StaticAllocas.reserve(NumArgs * 2);
8773 :
8774 : auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * {
8775 : if (!V)
8776 : return nullptr;
8777 : V = V->stripPointerCasts();
8778 : const auto *AI = dyn_cast<AllocaInst>(V);
8779 : if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI))
8780 : return nullptr;
8781 : auto Iter = StaticAllocas.insert({AI, Unknown});
8782 : return &Iter.first->second;
8783 210490 : };
8784 :
8785 : // Look for stores of arguments to static allocas. Look through bitcasts and
8786 : // GEPs to handle type coercions, as long as the alloca is fully initialized
8787 : // by the store. Any non-store use of an alloca escapes it and any subsequent
8788 : // unanalyzed store might write it.
8789 : // FIXME: Handle structs initialized with multiple stores.
8790 2393794 : for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
8791 : // Look for stores, and handle non-store uses conservatively.
8792 : const auto *SI = dyn_cast<StoreInst>(&I);
8793 : if (!SI) {
8794 : // We will look through cast uses, so ignore them completely.
8795 1831192 : if (I.isCast())
8796 : continue;
8797 : // Ignore debug info intrinsics, they don't escape or store to allocas.
8798 : if (isa<DbgInfoIntrinsic>(I))
8799 : continue;
8800 : // This is an unknown instruction. Assume it escapes or writes to all
8801 : // static alloca operands.
8802 5735881 : for (const Use &U : I.operands()) {
8803 2503151 : if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
8804 71166 : *Info = StaticAllocaInfo::Clobbered;
8805 : }
8806 : continue;
8807 : }
8808 :
8809 : // If the stored value is a static alloca, mark it as escaped.
8810 147582 : if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
8811 4169 : *Info = StaticAllocaInfo::Clobbered;
8812 :
8813 : // Check if the destination is a static alloca.
8814 147582 : const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
8815 147582 : StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
8816 147582 : if (!Info)
8817 : continue;
8818 : const AllocaInst *AI = cast<AllocaInst>(Dst);
8819 :
8820 : // Skip allocas that have been initialized or clobbered.
8821 72050 : if (*Info != StaticAllocaInfo::Unknown)
8822 : continue;
8823 :
8824 : // Check if the stored value is an argument, and that this store fully
8825 : // initializes the alloca. Don't elide copies from the same argument twice.
8826 66794 : const Value *Val = SI->getValueOperand()->stripPointerCasts();
8827 : const auto *Arg = dyn_cast<Argument>(Val);
8828 55759 : if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() ||
8829 55705 : Arg->getType()->isEmptyTy() ||
8830 27852 : DL.getTypeStoreSize(Arg->getType()) !=
8831 27852 : DL.getTypeAllocSize(AI->getAllocatedType()) ||
8832 8 : ArgCopyElisionCandidates.count(Arg)) {
8833 39096 : *Info = StaticAllocaInfo::Clobbered;
8834 39096 : continue;
8835 : }
8836 :
8837 : LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI
8838 : << '\n');
8839 :
8840 : // Mark this alloca and store for argument copy elision.
8841 27698 : *Info = StaticAllocaInfo::Elidable;
8842 27698 : ArgCopyElisionCandidates.insert({Arg, {AI, SI}});
8843 :
8844 : // Stop scanning if we've seen all arguments. This will happen early in -O0
8845 : // builds, which is useful, because -O0 builds have large entry blocks and
8846 : // many allocas.
8847 27698 : if (ArgCopyElisionCandidates.size() == NumArgs)
8848 : break;
8849 : }
8850 210490 : }
8851 :
8852 : /// Try to elide argument copies from memory into a local alloca. Succeeds if
8853 : /// ArgVal is a load from a suitable fixed stack object.
8854 0 : static void tryToElideArgumentCopy(
8855 : FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains,
8856 : DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
8857 : SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
8858 : ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
8859 : SDValue ArgVal, bool &ArgHasUses) {
8860 : // Check if this is a load from a fixed stack object.
8861 : auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
8862 : if (!LNode)
8863 0 : return;
8864 0 : auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
8865 : if (!FINode)
8866 0 : return;
8867 :
8868 : // Check that the fixed stack object is the right size and alignment.
8869 : // Look at the alignment that the user wrote on the alloca instead of looking
8870 : // at the stack object.
8871 0 : auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg);
8872 : assert(ArgCopyIter != ArgCopyElisionCandidates.end());
8873 0 : const AllocaInst *AI = ArgCopyIter->second.first;
8874 0 : int FixedIndex = FINode->getIndex();
8875 0 : int &AllocaIndex = FuncInfo->StaticAllocaMap[AI];
8876 0 : int OldIndex = AllocaIndex;
8877 0 : MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo();
8878 0 : if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
8879 : LLVM_DEBUG(
8880 : dbgs() << " argument copy elision failed due to bad fixed stack "
8881 : "object size\n");
8882 0 : return;
8883 : }
8884 0 : unsigned RequiredAlignment = AI->getAlignment();
8885 0 : if (!RequiredAlignment) {
8886 0 : RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment(
8887 : AI->getAllocatedType());
8888 : }
8889 0 : if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) {
8890 : LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
8891 : "greater than stack argument alignment ("
8892 : << RequiredAlignment << " vs "
8893 : << MFI.getObjectAlignment(FixedIndex) << ")\n");
8894 0 : return;
8895 : }
8896 :
8897 : // Perform the elision. Delete the old stack object and replace its only use
8898 : // in the variable info map. Mark the stack object as mutable.
8899 : LLVM_DEBUG({
8900 : dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
8901 : << " Replacing frame index " << OldIndex << " with " << FixedIndex
8902 : << '\n';
8903 : });
8904 : MFI.RemoveStackObject(OldIndex);
8905 : MFI.setIsImmutableObjectIndex(FixedIndex, false);
8906 0 : AllocaIndex = FixedIndex;
8907 0 : ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
8908 0 : Chains.push_back(ArgVal.getValue(1));
8909 :
8910 : // Avoid emitting code for the store implementing the copy.
8911 0 : const StoreInst *SI = ArgCopyIter->second.second;
8912 0 : ElidedArgCopyInstrs.insert(SI);
8913 :
8914 : // Check for uses of the argument again so that we can avoid exporting ArgVal
8915 : // if it is't used by anything other than the store.
8916 0 : for (const Value *U : Arg.users()) {
8917 0 : if (U != SI) {
8918 0 : ArgHasUses = true;
8919 0 : break;
8920 : }
8921 : }
8922 : }
8923 :
8924 210490 : void SelectionDAGISel::LowerArguments(const Function &F) {
8925 210490 : SelectionDAG &DAG = SDB->DAG;
8926 210490 : SDLoc dl = SDB->getCurSDLoc();
8927 210490 : const DataLayout &DL = DAG.getDataLayout();
8928 : SmallVector<ISD::InputArg, 16> Ins;
8929 :
8930 210490 : if (!FuncInfo->CanLowerReturn) {
8931 : // Put in an sret pointer parameter before all the other parameters.
8932 : SmallVector<EVT, 1> ValueVTs;
8933 1354 : ComputeValueVTs(*TLI, DAG.getDataLayout(),
8934 1354 : F.getReturnType()->getPointerTo(
8935 1354 : DAG.getDataLayout().getAllocaAddrSpace()),
8936 : ValueVTs);
8937 :
8938 : // NOTE: Assuming that a pointer will never break down to more than one VT
8939 : // or one register.
8940 : ISD::ArgFlagsTy Flags;
8941 : Flags.setSRet();
8942 2708 : MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
8943 : ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
8944 : ISD::InputArg::NoArgIndex, 0);
8945 1354 : Ins.push_back(RetArg);
8946 : }
8947 :
8948 : // Look for stores of arguments to static allocas. Mark such arguments with a
8949 : // flag to ask the target to give us the memory location of that argument if
8950 : // available.
8951 : ArgCopyElisionMapTy ArgCopyElisionCandidates;
8952 210490 : findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates);
8953 :
8954 : // Set up the incoming argument description vector.
8955 610963 : for (const Argument &Arg : F.args()) {
8956 400473 : unsigned ArgNo = Arg.getArgNo();
8957 : SmallVector<EVT, 4> ValueVTs;
8958 400473 : ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
8959 400473 : bool isArgValueUsed = !Arg.use_empty();
8960 : unsigned PartBase = 0;
8961 400473 : Type *FinalType = Arg.getType();
8962 400473 : if (Arg.hasAttribute(Attribute::ByVal))
8963 1225 : FinalType = cast<PointerType>(FinalType)->getElementType();
8964 800946 : bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
8965 400473 : FinalType, F.getCallingConv(), F.isVarArg());
8966 400473 : for (unsigned Value = 0, NumValues = ValueVTs.size();
8967 807998 : Value != NumValues; ++Value) {
8968 407525 : EVT VT = ValueVTs[Value];
8969 407525 : Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
8970 : ISD::ArgFlagsTy Flags;
8971 :
8972 : // Certain targets (such as MIPS), may have a different ABI alignment
8973 : // for a type depending on the context. Give the target a chance to
8974 : // specify the alignment it wants.
8975 : unsigned OriginalAlignment =
8976 407526 : TLI->getABIAlignmentForCallingConv(ArgTy, DL);
8977 :
8978 407526 : if (Arg.hasAttribute(Attribute::ZExt))
8979 : Flags.setZExt();
8980 407526 : if (Arg.hasAttribute(Attribute::SExt))
8981 : Flags.setSExt();
8982 407526 : if (Arg.hasAttribute(Attribute::InReg)) {
8983 : // If we are using vectorcall calling convention, a structure that is
8984 : // passed InReg - is surely an HVA
8985 3013 : if (F.getCallingConv() == CallingConv::X86_VectorCall &&
8986 74 : isa<StructType>(Arg.getType())) {
8987 : // The first value of a structure is marked
8988 50 : if (0 == Value)
8989 : Flags.setHvaStart();
8990 : Flags.setHva();
8991 : }
8992 : // Set InReg Flag
8993 : Flags.setInReg();
8994 : }
8995 407526 : if (Arg.hasAttribute(Attribute::StructRet))
8996 : Flags.setSRet();
8997 407526 : if (Arg.hasAttribute(Attribute::SwiftSelf))
8998 : Flags.setSwiftSelf();
8999 407526 : if (Arg.hasAttribute(Attribute::SwiftError))
9000 : Flags.setSwiftError();
9001 407526 : if (Arg.hasAttribute(Attribute::ByVal))
9002 : Flags.setByVal();
9003 407526 : if (Arg.hasAttribute(Attribute::InAlloca)) {
9004 : Flags.setInAlloca();
9005 : // Set the byval flag for CCAssignFn callbacks that don't know about
9006 : // inalloca. This way we can know how many bytes we should've allocated
9007 : // and how many bytes a callee cleanup function will pop. If we port
9008 : // inalloca to more targets, we'll have to add custom inalloca handling
9009 : // in the various CC lowering callbacks.
9010 : Flags.setByVal();
9011 : }
9012 407526 : if (F.getCallingConv() == CallingConv::X86_INTR) {
9013 : // IA Interrupt passes frame (1st parameter) by value in the stack.
9014 36 : if (ArgNo == 0)
9015 : Flags.setByVal();
9016 : }
9017 407526 : if (Flags.isByVal() || Flags.isInAlloca()) {
9018 1270 : PointerType *Ty = cast<PointerType>(Arg.getType());
9019 1270 : Type *ElementTy = Ty->getElementType();
9020 1270 : Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
9021 : // For ByVal, alignment should be passed from FE. BE will guess if
9022 : // this info is not there but there are cases it cannot get right.
9023 : unsigned FrameAlign;
9024 1270 : if (Arg.getParamAlignment())
9025 741 : FrameAlign = Arg.getParamAlignment();
9026 : else
9027 529 : FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL);
9028 : Flags.setByValAlign(FrameAlign);
9029 : }
9030 407526 : if (Arg.hasAttribute(Attribute::Nest))
9031 : Flags.setNest();
9032 407526 : if (NeedsRegBlock)
9033 : Flags.setInConsecutiveRegs();
9034 : Flags.setOrigAlign(OriginalAlignment);
9035 407526 : if (ArgCopyElisionCandidates.count(&Arg))
9036 : Flags.setCopyElisionCandidate();
9037 :
9038 407526 : MVT RegisterVT = TLI->getRegisterTypeForCallingConv(
9039 815052 : *CurDAG->getContext(), F.getCallingConv(), VT);
9040 815052 : unsigned NumRegs = TLI->getNumRegistersForCallingConv(
9041 815052 : *CurDAG->getContext(), F.getCallingConv(), VT);
9042 857523 : for (unsigned i = 0; i != NumRegs; ++i) {
9043 : ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
9044 449998 : ArgNo, PartBase+i*RegisterVT.getStoreSize());
9045 449998 : if (NumRegs > 1 && i == 0)
9046 : MyFlags.Flags.setSplit();
9047 : // if it isn't first piece, alignment must be 1
9048 431065 : else if (i > 0) {
9049 : MyFlags.Flags.setOrigAlign(1);
9050 42472 : if (i == NumRegs - 1)
9051 : MyFlags.Flags.setSplitEnd();
9052 : }
9053 449998 : Ins.push_back(MyFlags);
9054 : }
9055 407525 : if (NeedsRegBlock && Value == NumValues - 1)
9056 5618 : Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
9057 407525 : PartBase += VT.getStoreSize();
9058 : }
9059 : }
9060 :
9061 : // Call the target to set up the argument values.
9062 : SmallVector<SDValue, 8> InVals;
9063 210490 : SDValue NewRoot = TLI->LowerFormalArguments(
9064 631470 : DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals);
9065 :
9066 : // Verify that the target's LowerFormalArguments behaved as expected.
9067 : assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
9068 : "LowerFormalArguments didn't return a valid chain!");
9069 : assert(InVals.size() == Ins.size() &&
9070 : "LowerFormalArguments didn't emit the correct number of values!");
9071 : LLVM_DEBUG({
9072 : for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
9073 : assert(InVals[i].getNode() &&
9074 : "LowerFormalArguments emitted a null value!");
9075 : assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
9076 : "LowerFormalArguments emitted a value with the wrong type!");
9077 : }
9078 : });
9079 :
9080 : // Update the DAG with the new chain value resulting from argument lowering.
9081 210486 : DAG.setRoot(NewRoot);
9082 :
9083 : // Set up the argument values.
9084 : unsigned i = 0;
9085 210486 : if (!FuncInfo->CanLowerReturn) {
9086 : // Create a virtual register for the sret pointer, and put in a copy
9087 : // from the sret argument into it.
9088 : SmallVector<EVT, 1> ValueVTs;
9089 1354 : ComputeValueVTs(*TLI, DAG.getDataLayout(),
9090 1354 : F.getReturnType()->getPointerTo(
9091 1354 : DAG.getDataLayout().getAllocaAddrSpace()),
9092 : ValueVTs);
9093 : MVT VT = ValueVTs[0].getSimpleVT();
9094 2708 : MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
9095 : Optional<ISD::NodeType> AssertOp = None;
9096 : SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT,
9097 1354 : nullptr, F.getCallingConv(), AssertOp);
9098 :
9099 1354 : MachineFunction& MF = SDB->DAG.getMachineFunction();
9100 1354 : MachineRegisterInfo& RegInfo = MF.getRegInfo();
9101 1354 : unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
9102 1354 : FuncInfo->DemoteRegister = SRetReg;
9103 1354 : NewRoot =
9104 1354 : SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);
9105 1354 : DAG.setRoot(NewRoot);
9106 :
9107 : // i indexes lowered arguments. Bump it past the hidden sret argument.
9108 : ++i;
9109 : }
9110 :
9111 : SmallVector<SDValue, 4> Chains;
9112 : DenseMap<int, int> ArgCopyElisionFrameIndexMap;
9113 610954 : for (const Argument &Arg : F.args()) {
9114 : SmallVector<SDValue, 4> ArgValues;
9115 : SmallVector<EVT, 4> ValueVTs;
9116 400468 : ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
9117 400468 : unsigned NumValues = ValueVTs.size();
9118 400468 : if (NumValues == 0)
9119 : continue;
9120 :
9121 400444 : bool ArgHasUses = !Arg.use_empty();
9122 :
9123 : // Elide the copying store if the target loaded this argument from a
9124 : // suitable fixed stack object.
9125 800888 : if (Ins[i].Flags.isCopyElisionCandidate()) {
9126 27698 : tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
9127 : ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
9128 : InVals[i], ArgHasUses);
9129 : }
9130 :
9131 : // If this argument is unused then remember its value. It is used to generate
9132 : // debugging information.
9133 : bool isSwiftErrorArg =
9134 656767 : TLI->supportSwiftError() &&
9135 256323 : Arg.hasAttribute(Attribute::SwiftError);
9136 400444 : if (!ArgHasUses && !isSwiftErrorArg) {
9137 99122 : SDB->setUnusedArgValue(&Arg, InVals[i]);
9138 :
9139 : // Also remember any frame index for use in FastISel.
9140 : if (FrameIndexSDNode *FI =
9141 49561 : dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
9142 81 : FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
9143 : }
9144 :
9145 807965 : for (unsigned Val = 0; Val != NumValues; ++Val) {
9146 407521 : EVT VT = ValueVTs[Val];
9147 815042 : MVT PartVT = TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(),
9148 815042 : F.getCallingConv(), VT);
9149 815042 : unsigned NumParts = TLI->getNumRegistersForCallingConv(
9150 815042 : *CurDAG->getContext(), F.getCallingConv(), VT);
9151 :
9152 : // Even an apparant 'unused' swifterror argument needs to be returned. So
9153 : // we do generate a copy for it that can be used on return from the
9154 : // function.
9155 407521 : if (ArgHasUses || isSwiftErrorArg) {
9156 : Optional<ISD::NodeType> AssertOp;
9157 352091 : if (Arg.hasAttribute(Attribute::SExt))
9158 : AssertOp = ISD::AssertSext;
9159 341869 : else if (Arg.hasAttribute(Attribute::ZExt))
9160 : AssertOp = ISD::AssertZext;
9161 :
9162 352091 : ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
9163 : PartVT, VT, nullptr,
9164 704182 : F.getCallingConv(), AssertOp));
9165 : }
9166 :
9167 407521 : i += NumParts;
9168 : }
9169 :
9170 : // We don't need to do anything else for unused arguments.
9171 400444 : if (ArgValues.empty())
9172 : continue;
9173 :
9174 : // Note down frame index.
9175 : if (FrameIndexSDNode *FI =
9176 350883 : dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
9177 898 : FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
9178 :
9179 350883 : SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
9180 350883 : SDB->getCurSDLoc());
9181 :
9182 350883 : SDB->setValue(&Arg, Res);
9183 350883 : if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
9184 : // We want to associate the argument with the frame index, among
9185 : // involved operands, that correspond to the lowest address. The
9186 : // getCopyFromParts function, called earlier, is swapping the order of
9187 : // the operands to BUILD_PAIR depending on endianness. The result of
9188 : // that swapping is that the least significant bits of the argument will
9189 : // be in the first operand of the BUILD_PAIR node, and the most
9190 : // significant bits will be in the second operand.
9191 4618 : unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9192 : if (LoadSDNode *LNode =
9193 4618 : dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode()))
9194 : if (FrameIndexSDNode *FI =
9195 1892 : dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
9196 1607 : FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
9197 : }
9198 :
9199 : // Update the SwiftErrorVRegDefMap.
9200 350883 : if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
9201 103 : unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
9202 103 : if (TargetRegisterInfo::isVirtualRegister(Reg))
9203 103 : FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB,
9204 : FuncInfo->SwiftErrorArg, Reg);
9205 : }
9206 :
9207 : // If this argument is live outside of the entry block, insert a copy from
9208 : // wherever we got it to the vreg that other BB's will reference it as.
9209 350883 : if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
9210 : // If we can, though, try to skip creating an unnecessary vreg.
9211 : // FIXME: This isn't very clean... it would be nice to make this more
9212 : // general. It's also subtly incompatible with the hacks FastISel
9213 : // uses with vregs.
9214 220402 : unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
9215 220402 : if (TargetRegisterInfo::isVirtualRegister(Reg)) {
9216 220402 : FuncInfo->ValueMap[&Arg] = Reg;
9217 220402 : continue;
9218 : }
9219 : }
9220 130481 : if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) {
9221 59013 : FuncInfo->InitializeRegForValue(&Arg);
9222 59013 : SDB->CopyToExportRegsIfNeeded(&Arg);
9223 : }
9224 : }
9225 :
9226 210486 : if (!Chains.empty()) {
9227 696 : Chains.push_back(NewRoot);
9228 696 : NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
9229 : }
9230 :
9231 210486 : DAG.setRoot(NewRoot);
9232 :
9233 : assert(i == InVals.size() && "Argument register count mismatch!");
9234 :
9235 : // If any argument copy elisions occurred and we have debug info, update the
9236 : // stale frame indices used in the dbg.declare variable info table.
9237 210486 : MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
9238 210486 : if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) {
9239 0 : for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
9240 0 : auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
9241 0 : if (I != ArgCopyElisionFrameIndexMap.end())
9242 0 : VI.Slot = I->second;
9243 : }
9244 : }
9245 :
9246 : // Finally, if the target has anything special to do, allow it to do so.
9247 210486 : EmitFunctionEntryCode();
9248 210486 : }
9249 :
9250 : /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
9251 : /// ensure constants are generated when needed. Remember the virtual registers
9252 : /// that need to be added to the Machine PHI nodes as input. We cannot just
9253 : /// directly add them, because expansion might result in multiple MBB's for one
9254 : /// BB. As such, the start of the BB might correspond to a different MBB than
9255 : /// the end.
9256 : void
9257 889932 : SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
9258 889932 : const Instruction *TI = LLVMBB->getTerminator();
9259 :
9260 : SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
9261 :
9262 : // Check PHI nodes in successors that expect a value to be available from this
9263 : // block.
9264 2167167 : for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
9265 1277235 : const BasicBlock *SuccBB = TI->getSuccessor(succ);
9266 1277349 : if (!isa<PHINode>(SuccBB->begin())) continue;
9267 132636 : MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
9268 :
9269 : // If this terminator has multiple identical successors (common for
9270 : // switches), only handle each succ once.
9271 132636 : if (!SuccsHandled.insert(SuccMBB).second)
9272 : continue;
9273 :
9274 : MachineBasicBlock::iterator MBBI = SuccMBB->begin();
9275 :
9276 : // At this point we know that there is a 1-1 correspondence between LLVM PHI
9277 : // nodes and Machine PHI nodes, but the incoming operands have not been
9278 : // emitted yet.
9279 486947 : for (const PHINode &PN : SuccBB->phis()) {
9280 : // Ignore dead phi's.
9281 221903 : if (PN.use_empty())
9282 4418 : continue;
9283 :
9284 : // Skip empty types
9285 217489 : if (PN.getType()->isEmptyTy())
9286 : continue;
9287 :
9288 : unsigned Reg;
9289 217485 : const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
9290 :
9291 217485 : if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
9292 103712 : unsigned &RegOut = ConstantsOut[C];
9293 103712 : if (RegOut == 0) {
9294 100922 : RegOut = FuncInfo.CreateRegs(C->getType());
9295 100922 : CopyValueToVirtualRegister(C, RegOut);
9296 : }
9297 103712 : Reg = RegOut;
9298 : } else {
9299 : DenseMap<const Value *, unsigned>::iterator I =
9300 113773 : FuncInfo.ValueMap.find(PHIOp);
9301 227546 : if (I != FuncInfo.ValueMap.end())
9302 113688 : Reg = I->second;
9303 : else {
9304 : assert(isa<AllocaInst>(PHIOp) &&
9305 : FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
9306 : "Didn't codegen value into a register!??");
9307 85 : Reg = FuncInfo.CreateRegs(PHIOp->getType());
9308 85 : CopyValueToVirtualRegister(PHIOp, Reg);
9309 : }
9310 : }
9311 :
9312 : // Remember that this register needs to added to the machine PHI node as
9313 : // the input for this MBB.
9314 : SmallVector<EVT, 4> ValueVTs;
9315 217485 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9316 217485 : ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
9317 450245 : for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
9318 232760 : EVT VT = ValueVTs[vti];
9319 232760 : unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
9320 467662 : for (unsigned i = 0, e = NumRegisters; i != e; ++i)
9321 234902 : FuncInfo.PHINodesToUpdate.push_back(
9322 469804 : std::make_pair(&*MBBI++, Reg + i));
9323 232760 : Reg += NumRegisters;
9324 : }
9325 : }
9326 : }
9327 :
9328 889932 : ConstantsOut.clear();
9329 889931 : }
9330 :
9331 : /// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
9332 : /// is 0.
9333 : MachineBasicBlock *
9334 516 : SelectionDAGBuilder::StackProtectorDescriptor::
9335 : AddSuccessorMBB(const BasicBlock *BB,
9336 : MachineBasicBlock *ParentMBB,
9337 : bool IsLikely,
9338 : MachineBasicBlock *SuccMBB) {
9339 : // If SuccBB has not been created yet, create it.
9340 516 : if (!SuccMBB) {
9341 507 : MachineFunction *MF = ParentMBB->getParent();
9342 : MachineFunction::iterator BBI(ParentMBB);
9343 507 : SuccMBB = MF->CreateMachineBasicBlock(BB);
9344 : MF->insert(++BBI, SuccMBB);
9345 : }
9346 : // Add it as a successor of ParentMBB.
9347 516 : ParentMBB->addSuccessor(
9348 : SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
9349 516 : return SuccMBB;
9350 : }
9351 :
9352 181805 : MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
9353 : MachineFunction::iterator I(MBB);
9354 363610 : if (++I == FuncInfo.MF->end())
9355 700 : return nullptr;
9356 : return &*I;
9357 : }
9358 :
9359 : /// During lowering new call nodes can be created (such as memset, etc.).
9360 : /// Those will become new roots of the current DAG, but complications arise
9361 : /// when they are tail calls. In such cases, the call lowering will update
9362 : /// the root, but the builder still needs to know that a tail call has been
9363 : /// lowered in order to avoid generating an additional return.
9364 258691 : void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
9365 : // If the node is null, we do have a tail call.
9366 258691 : if (MaybeTC.getNode() != nullptr)
9367 258656 : DAG.setRoot(MaybeTC);
9368 : else
9369 35 : HasTailCall = true;
9370 258691 : }
9371 :
9372 : uint64_t
9373 6624 : SelectionDAGBuilder::getJumpTableRange(const CaseClusterVector &Clusters,
9374 : unsigned First, unsigned Last) const {
9375 : assert(Last >= First);
9376 13248 : const APInt &LowCase = Clusters[First].Low->getValue();
9377 13248 : const APInt &HighCase = Clusters[Last].High->getValue();
9378 : assert(LowCase.getBitWidth() == HighCase.getBitWidth());
9379 :
9380 : // FIXME: A range of consecutive cases has 100% density, but only requires one
9381 : // comparison to lower. We should discriminate against such consecutive ranges
9382 : // in jump tables.
9383 :
9384 6624 : return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1;
9385 : }
9386 :
9387 6624 : uint64_t SelectionDAGBuilder::getJumpTableNumCases(
9388 : const SmallVectorImpl<unsigned> &TotalCases, unsigned First,
9389 : unsigned Last) const {
9390 : assert(Last >= First);
9391 : assert(TotalCases[Last] >= TotalCases[First]);
9392 : uint64_t NumCases =
9393 13248 : TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
9394 6624 : return NumCases;
9395 : }
9396 :
9397 3581 : bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters,
9398 : unsigned First, unsigned Last,
9399 : const SwitchInst *SI,
9400 : MachineBasicBlock *DefaultMBB,
9401 : CaseCluster &JTCluster) {
9402 : assert(First <= Last);
9403 :
9404 : auto Prob = BranchProbability::getZero();
9405 : unsigned NumCmps = 0;
9406 : std::vector<MachineBasicBlock*> Table;
9407 : DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
9408 :
9409 : // Initialize probabilities in JTProbs.
9410 24829 : for (unsigned I = First; I <= Last; ++I)
9411 42496 : JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
9412 :
9413 24829 : for (unsigned I = First; I <= Last; ++I) {
9414 : assert(Clusters[I].Kind == CC_Range);
9415 21248 : Prob += Clusters[I].Prob;
9416 21248 : const APInt &Low = Clusters[I].Low->getValue();
9417 21248 : const APInt &High = Clusters[I].High->getValue();
9418 21248 : NumCmps += (Low == High) ? 1 : 2;
9419 21248 : if (I != First) {
9420 : // Fill the gap between this and the previous cluster.
9421 35334 : const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
9422 : assert(PreviousHigh.slt(Low));
9423 17667 : uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
9424 70753 : for (uint64_t J = 0; J < Gap; J++)
9425 53086 : Table.push_back(DefaultMBB);
9426 : }
9427 21248 : uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
9428 47282 : for (uint64_t J = 0; J < ClusterSize; ++J)
9429 52068 : Table.push_back(Clusters[I].MBB);
9430 42496 : JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
9431 : }
9432 :
9433 3581 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9434 : unsigned NumDests = JTProbs.size();
9435 3581 : if (TLI.isSuitableForBitTests(
9436 7162 : NumDests, NumCmps, Clusters[First].Low->getValue(),
9437 7162 : Clusters[Last].High->getValue(), DAG.getDataLayout())) {
9438 : // Clusters[First..Last] should be lowered as bit tests instead.
9439 : return false;
9440 : }
9441 :
9442 : // Create the MBB that will load from and jump through the table.
9443 : // Note: We create it here, but it's not inserted into the function yet.
9444 3199 : MachineFunction *CurMF = FuncInfo.MF;
9445 : MachineBasicBlock *JumpTableMBB =
9446 3199 : CurMF->CreateMachineBasicBlock(SI->getParent());
9447 :
9448 : // Add successors. Note: use table order for determinism.
9449 : SmallPtrSet<MachineBasicBlock *, 8> Done;
9450 75186 : for (MachineBasicBlock *Succ : Table) {
9451 71987 : if (Done.count(Succ))
9452 : continue;
9453 17567 : addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]);
9454 17567 : Done.insert(Succ);
9455 : }
9456 : JumpTableMBB->normalizeSuccProbs();
9457 :
9458 3199 : unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
9459 3199 : ->createJumpTableIndex(Table);
9460 :
9461 : // Set up the jump table info.
9462 : JumpTable JT(-1U, JTI, JumpTableMBB, nullptr);
9463 6398 : JumpTableHeader JTH(Clusters[First].Low->getValue(),
9464 6398 : Clusters[Last].High->getValue(), SI->getCondition(),
9465 6398 : nullptr, false);
9466 3199 : JTCases.emplace_back(std::move(JTH), std::move(JT));
9467 :
9468 6398 : JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
9469 6398 : JTCases.size() - 1, Prob);
9470 : return true;
9471 : }
9472 :
9473 11419 : void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
9474 : const SwitchInst *SI,
9475 : MachineBasicBlock *DefaultMBB) {
9476 : #ifndef NDEBUG
9477 : // Clusters must be non-empty, sorted, and only contain Range clusters.
9478 : assert(!Clusters.empty());
9479 : for (CaseCluster &C : Clusters)
9480 : assert(C.Kind == CC_Range);
9481 : for (unsigned i = 1, e = Clusters.size(); i < e; ++i)
9482 : assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue()));
9483 : #endif
9484 :
9485 11419 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9486 11419 : if (!TLI.areJTsAllowed(SI->getParent()->getParent()))
9487 11337 : return;
9488 :
9489 11403 : const int64_t N = Clusters.size();
9490 11403 : const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries();
9491 11403 : const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
9492 :
9493 11403 : if (N < 2 || N < MinJumpTableEntries)
9494 : return;
9495 :
9496 : // TotalCases[i]: Total nbr of cases in Clusters[0..i].
9497 4083 : SmallVector<unsigned, 8> TotalCases(N);
9498 28344 : for (unsigned i = 0; i < N; ++i) {
9499 48522 : const APInt &Hi = Clusters[i].High->getValue();
9500 24261 : const APInt &Lo = Clusters[i].Low->getValue();
9501 48522 : TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
9502 24261 : if (i != 0)
9503 60534 : TotalCases[i] += TotalCases[i - 1];
9504 : }
9505 :
9506 : // Cheap case: the whole range may be suitable for jump table.
9507 4083 : uint64_t Range = getJumpTableRange(Clusters,0, N - 1);
9508 4083 : uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1);
9509 : assert(NumCases < UINT64_MAX / 100);
9510 : assert(Range >= NumCases);
9511 4083 : if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) {
9512 : CaseCluster JTCluster;
9513 3540 : if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
9514 3168 : Clusters[0] = JTCluster;
9515 3168 : Clusters.resize(1);
9516 3168 : return;
9517 : }
9518 : }
9519 :
9520 : // The algorithm below is not suitable for -O0.
9521 915 : if (TM.getOptLevel() == CodeGenOpt::None)
9522 : return;
9523 :
9524 : // Split Clusters into minimum number of dense partitions. The algorithm uses
9525 : // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code
9526 : // for the Case Statement'" (1994), but builds the MinPartitions array in
9527 : // reverse order to make it easier to reconstruct the partitions in ascending
9528 : // order. In the choice between two optimal partitionings, it picks the one
9529 : // which yields more jump tables.
9530 :
9531 : // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
9532 82 : SmallVector<unsigned, 8> MinPartitions(N);
9533 : // LastElement[i] is the last element of the partition starting at i.
9534 82 : SmallVector<unsigned, 8> LastElement(N);
9535 : // PartitionsScore[i] is used to break ties when choosing between two
9536 : // partitionings resulting in the same number of partitions.
9537 82 : SmallVector<unsigned, 8> PartitionsScore(N);
9538 : // For PartitionsScore, a small number of comparisons is considered as good as
9539 : // a jump table and a single comparison is considered better than a jump
9540 : // table.
9541 : enum PartitionScores : unsigned {
9542 : NoTable = 0,
9543 : Table = 1,
9544 : FewCases = 1,
9545 : SingleCase = 2
9546 : };
9547 :
9548 : // Base case: There is only one way to partition Clusters[N-1].
9549 164 : MinPartitions[N - 1] = 1;
9550 82 : LastElement[N - 1] = N - 1;
9551 82 : PartitionsScore[N - 1] = PartitionScores::SingleCase;
9552 :
9553 : // Note: loop indexes are signed to avoid underflow.
9554 590 : for (int64_t i = N - 2; i >= 0; i--) {
9555 : // Find optimal partitioning of Clusters[i..N-1].
9556 : // Baseline: Put Clusters[i] into a partition on its own.
9557 1524 : MinPartitions[i] = MinPartitions[i + 1] + 1;
9558 508 : LastElement[i] = i;
9559 1016 : PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase;
9560 :
9561 : // Search for a solution that results in fewer partitions.
9562 3049 : for (int64_t j = N - 1; j > i; j--) {
9563 : // Try building a partition from Clusters[i..j].
9564 2541 : uint64_t Range = getJumpTableRange(Clusters, i, j);
9565 2541 : uint64_t NumCases = getJumpTableNumCases(TotalCases, i, j);
9566 : assert(NumCases < UINT64_MAX / 100);
9567 : assert(Range >= NumCases);
9568 2541 : if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) {
9569 837 : unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
9570 837 : unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1];
9571 837 : int64_t NumEntries = j - i + 1;
9572 :
9573 : if (NumEntries == 1)
9574 : Score += PartitionScores::SingleCase;
9575 837 : else if (NumEntries <= SmallNumberOfEntries)
9576 262 : Score += PartitionScores::FewCases;
9577 575 : else if (NumEntries >= MinJumpTableEntries)
9578 400 : Score += PartitionScores::Table;
9579 :
9580 : // If this leads to fewer partitions, or to the same number of
9581 : // partitions with better score, it is a better partitioning.
9582 837 : if (NumPartitions < MinPartitions[i] ||
9583 94 : (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) {
9584 264 : MinPartitions[i] = NumPartitions;
9585 264 : LastElement[i] = j;
9586 264 : PartitionsScore[i] = Score;
9587 : }
9588 : }
9589 : }
9590 : }
9591 :
9592 : // Iterate over the partitions, replacing some with jump tables in-place.
9593 : unsigned DstIndex = 0;
9594 407 : for (unsigned First = 0, Last; First < N; First = Last + 1) {
9595 325 : Last = LastElement[First];
9596 : assert(Last >= First);
9597 : assert(DstIndex <= First);
9598 325 : unsigned NumClusters = Last - First + 1;
9599 :
9600 : CaseCluster JTCluster;
9601 366 : if (NumClusters >= MinJumpTableEntries &&
9602 41 : buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
9603 62 : Clusters[DstIndex++] = JTCluster;
9604 : } else {
9605 703 : for (unsigned I = First; I <= Last; ++I)
9606 1227 : std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
9607 : }
9608 : }
9609 82 : Clusters.resize(DstIndex);
9610 : }
9611 :
9612 1236 : bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
9613 : unsigned First, unsigned Last,
9614 : const SwitchInst *SI,
9615 : CaseCluster &BTCluster) {
9616 : assert(First <= Last);
9617 1236 : if (First == Last)
9618 : return false;
9619 :
9620 1280 : BitVector Dests(FuncInfo.MF->getNumBlockIDs());
9621 : unsigned NumCmps = 0;
9622 2156 : for (int64_t I = First; I <= Last; ++I) {
9623 : assert(Clusters[I].Kind == CC_Range);
9624 3032 : Dests.set(Clusters[I].MBB->getNumber());
9625 1567 : NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
9626 : }
9627 : unsigned NumDests = Dests.count();
9628 :
9629 1280 : APInt Low = Clusters[First].Low->getValue();
9630 1280 : APInt High = Clusters[Last].High->getValue();
9631 : assert(Low.slt(High));
9632 :
9633 640 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9634 640 : const DataLayout &DL = DAG.getDataLayout();
9635 640 : if (!TLI.isSuitableForBitTests(NumDests, NumCmps, Low, High, DL))
9636 : return false;
9637 :
9638 : APInt LowBound;
9639 : APInt CmpRange;
9640 :
9641 49 : const int BitWidth = TLI.getPointerTy(DL).getSizeInBits();
9642 : assert(TLI.rangeFitsInWord(Low, High, DL) &&
9643 : "Case range must fit in bit mask!");
9644 :
9645 : // Check if the clusters cover a contiguous range such that no value in the
9646 : // range will jump to the default statement.
9647 49 : bool ContiguousRange = true;
9648 84 : for (int64_t I = First + 1; I <= Last; ++I) {
9649 395 : if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) {
9650 44 : ContiguousRange = false;
9651 44 : break;
9652 : }
9653 : }
9654 :
9655 49 : if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
9656 : // Optimize the case where all the case values fit in a word without having
9657 : // to subtract minValue. In this case, we can optimize away the subtraction.
9658 7 : LowBound = APInt::getNullValue(Low.getBitWidth());
9659 7 : CmpRange = High;
9660 7 : ContiguousRange = false;
9661 : } else {
9662 42 : LowBound = Low;
9663 42 : CmpRange = High - Low;
9664 : }
9665 :
9666 : CaseBitsVector CBV;
9667 49 : auto TotalProb = BranchProbability::getZero();
9668 221 : for (unsigned i = First; i <= Last; ++i) {
9669 : // Find the CaseBits for this destination.
9670 : unsigned j;
9671 393 : for (j = 0; j < CBV.size(); ++j)
9672 312 : if (CBV[j].BB == Clusters[i].MBB)
9673 : break;
9674 172 : if (j == CBV.size())
9675 : CBV.push_back(
9676 130 : CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero()));
9677 172 : CaseBits *CB = &CBV[j];
9678 :
9679 : // Update Mask, Bits and ExtraProb.
9680 344 : uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
9681 344 : uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
9682 : assert(Hi >= Lo && Hi < 64 && "Invalid bit case!");
9683 172 : CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo;
9684 172 : CB->Bits += Hi - Lo + 1;
9685 172 : CB->ExtraProb += Clusters[i].Prob;
9686 : TotalProb += Clusters[i].Prob;
9687 : }
9688 :
9689 : BitTestInfo BTI;
9690 : llvm::sort(CBV, [](const CaseBits &a, const CaseBits &b) {
9691 : // Sort by probability first, number of bits second, bit mask third.
9692 0 : if (a.ExtraProb != b.ExtraProb)
9693 : return a.ExtraProb > b.ExtraProb;
9694 0 : if (a.Bits != b.Bits)
9695 0 : return a.Bits > b.Bits;
9696 0 : return a.Mask < b.Mask;
9697 : });
9698 :
9699 114 : for (auto &CB : CBV) {
9700 : MachineBasicBlock *BitTestBB =
9701 65 : FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
9702 130 : BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb));
9703 : }
9704 98 : BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
9705 49 : SI->getCondition(), -1U, MVT::Other, false,
9706 49 : ContiguousRange, nullptr, nullptr, std::move(BTI),
9707 : TotalProb);
9708 :
9709 98 : BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
9710 196 : BitTestCases.size() - 1, TotalProb);
9711 : return true;
9712 : }
9713 :
9714 11419 : void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
9715 : const SwitchInst *SI) {
9716 : // Partition Clusters into as few subsets as possible, where each subset has a
9717 : // range that fits in a machine word and has <= 3 unique destinations.
9718 :
9719 : #ifndef NDEBUG
9720 : // Clusters must be sorted and contain Range or JumpTable clusters.
9721 : assert(!Clusters.empty());
9722 : assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable);
9723 : for (const CaseCluster &C : Clusters)
9724 : assert(C.Kind == CC_Range || C.Kind == CC_JumpTable);
9725 : for (unsigned i = 1; i < Clusters.size(); ++i)
9726 : assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue()));
9727 : #endif
9728 :
9729 : // The algorithm below is not suitable for -O0.
9730 11419 : if (TM.getOptLevel() == CodeGenOpt::None)
9731 10455 : return;
9732 :
9733 : // If target does not have legal shift left, do not emit bit tests at all.
9734 967 : const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9735 967 : const DataLayout &DL = DAG.getDataLayout();
9736 :
9737 967 : EVT PTy = TLI.getPointerTy(DL);
9738 : if (!TLI.isOperationLegal(ISD::SHL, PTy))
9739 3 : return;
9740 :
9741 964 : int BitWidth = PTy.getSizeInBits();
9742 964 : const int64_t N = Clusters.size();
9743 :
9744 : // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
9745 964 : SmallVector<unsigned, 8> MinPartitions(N);
9746 : // LastElement[i] is the last element of the partition starting at i.
9747 964 : SmallVector<unsigned, 8> LastElement(N);
9748 :
9749 : // FIXME: This might not be the best algorithm for finding bit test clusters.
9750 :
9751 : // Base case: There is only one way to partition Clusters[N-1].
9752 964 : MinPartitions[N - 1] = 1;
9753 964 : LastElement[N - 1] = N - 1;
9754 :
9755 : // Note: loop indexes are signed to avoid underflow.
9756 2112 : for (int64_t i = N - 2; i >= 0; --i) {
9757 : // Find optimal partitioning of Clusters[i..N-1].
9758 : // Baseline: Put Clusters[i] into a partition on its own.
9759 3444 : MinPartitions[i] = MinPartitions[i + 1] + 1;
9760 1148 : LastElement[i] = i;
9761 :
9762 : // Search for a solution that results in fewer partitions.
9763 : // Note: the search is limited by BitWidth, reducing time complexity.
9764 3387 : for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) {
9765 : // Try building a partition from Clusters[i..j].
9766 :
9767 : // Check the range.
9768 4598 : if (!TLI.rangeFitsInWord(Clusters[i].Low->getValue(),
9769 4598 : Clusters[j].High->getValue(), DL))
9770 994 : continue;
9771 :
9772 : // Check nbr of destinations and cluster types.
9773 : // FIXME: This works, but doesn't seem very efficient.
9774 : bool RangesOnly = true;
9775 2610 : BitVector Dests(FuncInfo.MF->getNumBlockIDs());
9776 4612 : for (int64_t k = i; k <= j; k++) {
9777 6642 : if (Clusters[k].Kind != CC_Range) {
9778 : RangesOnly = false;
9779 : break;
9780 : }
9781 3307 : Dests.set(Clusters[k].MBB->getNumber());
9782 : }
9783 2596 : if (!RangesOnly || Dests.count() > 3)
9784 : break;
9785 :
9786 : // Check if it's a better partition.
9787 1245 : unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
9788 1245 : if (NumPartitions < MinPartitions[i]) {
9789 : // Found a better partition.
9790 876 : MinPartitions[i] = NumPartitions;
9791 876 : LastElement[i] = j;
9792 : }
9793 : }
9794 : }
9795 :
9796 : // Iterate over the partitions, replacing with bit-test clusters in-place.
9797 : unsigned DstIndex = 0;
9798 2200 : for (unsigned First = 0, Last; First < N; First = Last + 1) {
9799 2472 : Last = LastElement[First];
9800 : assert(First <= Last);
9801 : assert(DstIndex <= First);
9802 :
9803 : CaseCluster BitTestCluster;
9804 1236 : if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) {
9805 98 : Clusters[DstIndex++] = BitTestCluster;
9806 : } else {
9807 1187 : size_t NumClusters = Last - First + 1;
9808 2374 : std::memmove(&Clusters[DstIndex], &Clusters[First],
9809 : sizeof(Clusters[0]) * NumClusters);
9810 1187 : DstIndex += NumClusters;
9811 : }
9812 : }
9813 964 : Clusters.resize(DstIndex);
9814 : }
9815 :
9816 11539 : void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
9817 : MachineBasicBlock *SwitchMBB,
9818 : MachineBasicBlock *DefaultMBB) {
9819 11539 : MachineFunction *CurMF = FuncInfo.MF;
9820 : MachineBasicBlock *NextMBB = nullptr;
9821 : MachineFunction::iterator BBI(W.MBB);
9822 11539 : if (++BBI != FuncInfo.MF->end())
9823 : NextMBB = &*BBI;
9824 :
9825 11539 : unsigned Size = W.LastCluster - W.FirstCluster + 1;
9826 :
9827 11539 : BranchProbabilityInfo *BPI = FuncInfo.BPI;
9828 :
9829 11539 : if (Size == 2 && W.MBB == SwitchMBB) {
9830 : // If any two of the cases has the same destination, and if one value
9831 : // is the same as the other, but has one bit unset that the other has set,
9832 : // use bit manipulation to do two compares at once. For example:
9833 : // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
9834 : // TODO: This could be extended to merge any 2 cases in switches with 3
9835 : // cases.
9836 : // TODO: Handle cases where W.CaseBB != SwitchBB.
9837 : CaseCluster &Small = *W.FirstCluster;
9838 : CaseCluster &Big = *W.LastCluster;
9839 :
9840 5530 : if (Small.Low == Small.High && Big.Low == Big.High &&
9841 3452 : Small.MBB == Big.MBB) {
9842 : const APInt &SmallValue = Small.Low->getValue();
9843 : const APInt &BigValue = Big.Low->getValue();
9844 :
9845 : // Check that there is only one bit different.
9846 374 : APInt CommonBit = BigValue ^ SmallValue;
9847 374 : if (CommonBit.isPowerOf2()) {
9848 25 : SDValue CondLHS = getValue(Cond);
9849 25 : EVT VT = CondLHS.getValueType();
9850 25 : SDLoc DL = getCurSDLoc();
9851 :
9852 25 : SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
9853 25 : DAG.getConstant(CommonBit, DL, VT));
9854 25 : SDValue Cond = DAG.getSetCC(
9855 25 : DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT),
9856 25 : ISD::SETEQ);
9857 :
9858 : // Update successor info.
9859 : // Both Small and Big will jump to Small.BB, so we sum up the
9860 : // probabilities.
9861 25 : addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob);
9862 25 : if (BPI)
9863 25 : addSuccessorWithProb(
9864 : SwitchMBB, DefaultMBB,
9865 : // The default destination is the first successor in IR.
9866 : BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0));
9867 : else
9868 0 : addSuccessorWithProb(SwitchMBB, DefaultMBB);
9869 :
9870 : // Insert the true branch.
9871 : SDValue BrCond =
9872 25 : DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
9873 25 : DAG.getBasicBlock(Small.MBB));
9874 : // Insert the false branch.
9875 25 : BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
9876 25 : DAG.getBasicBlock(DefaultMBB));
9877 :
9878 25 : DAG.setRoot(BrCond);
9879 : return;
9880 : }
9881 : }
9882 : }
9883 :
9884 11514 : if (TM.getOptLevel() != CodeGenOpt::None) {
9885 : // Here, we order cases by probability so the most likely case will be
9886 : // checked first. However, two clusters can have the same probability in
9887 : // which case their relative ordering is non-deterministic. So we use Low
9888 : // as a tie-breaker as clusters are guaranteed to never overlap.
9889 : llvm::sort(W.FirstCluster, W.LastCluster + 1,
9890 : [](const CaseCluster &a, const CaseCluster &b) {
9891 0 : return a.Prob != b.Prob ?
9892 : a.Prob > b.Prob :
9893 0 : a.Low->getValue().slt(b.Low->getValue());
9894 : });
9895 :
9896 : // Rearrange the case blocks so that the last one falls through if possible
9897 : // without changing the order of probabilities.
9898 1562 : for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
9899 : --I;
9900 845 : if (I->Prob > W.LastCluster->Prob)
9901 : break;
9902 761 : if (I->Kind == CC_Range && I->MBB == NextMBB) {
9903 : std::swap(*I, *W.LastCluster);
9904 : break;
9905 : }
9906 : }
9907 : }
9908 :
9909 : // Compute total probability.
9910 11514 : BranchProbability DefaultProb = W.DefaultProb;
9911 : BranchProbability UnhandledProbs = DefaultProb;
9912 33602 : for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
9913 : UnhandledProbs += I->Prob;
9914 :
9915 : MachineBasicBlock *CurMBB = W.MBB;
9916 33602 : for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
9917 : MachineBasicBlock *Fallthrough;
9918 22088 : if (I == W.LastCluster) {
9919 : // For the last cluster, fall through to the default destination.
9920 : Fallthrough = DefaultMBB;
9921 : } else {
9922 10574 : Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
9923 : CurMF->insert(BBI, Fallthrough);
9924 : // Put Cond in a virtual register to make it available from the new blocks.
9925 10574 : ExportFromCurrentBlock(Cond);
9926 : }
9927 : UnhandledProbs -= I->Prob;
9928 :
9929 22088 : switch (I->Kind) {
9930 3199 : case CC_JumpTable: {
9931 : // FIXME: Optimize away range check based on pivot comparisons.
9932 3199 : JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first;
9933 3199 : JumpTable *JT = &JTCases[I->JTCasesIndex].second;
9934 :
9935 : // The jump block hasn't been inserted yet; insert it here.
9936 3199 : MachineBasicBlock *JumpMBB = JT->MBB;
9937 : CurMF->insert(BBI, JumpMBB);
9938 :
9939 3199 : auto JumpProb = I->Prob;
9940 : auto FallthroughProb = UnhandledProbs;
9941 :
9942 : // If the default statement is a target of the jump table, we evenly
9943 : // distribute the default probability to successors of CurMBB. Also
9944 : // update the probability on the edge from JumpMBB to Fallthrough.
9945 : for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
9946 : SE = JumpMBB->succ_end();
9947 14393 : SI != SE; ++SI) {
9948 12209 : if (*SI == DefaultMBB) {
9949 : JumpProb += DefaultProb / 2;
9950 : FallthroughProb -= DefaultProb / 2;
9951 1015 : JumpMBB->setSuccProbability(SI, DefaultProb / 2);
9952 : JumpMBB->normalizeSuccProbs();
9953 : break;
9954 : }
9955 : }
9956 :
9957 3199 : addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
9958 3199 : addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
9959 : CurMBB->normalizeSuccProbs();
9960 :
9961 : // The jump table header will be inserted in our current block, do the
9962 : // range check, and fall through to our fallthrough block.
9963 3199 : JTH->HeaderBB = CurMBB;
9964 3199 : JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
9965 :
9966 : // If we're in the right place, emit the jump table header right now.
9967 3199 : if (CurMBB == SwitchMBB) {
9968 3186 : visitJumpTableHeader(*JT, *JTH, SwitchMBB);
9969 3186 : JTH->Emitted = true;
9970 : }
9971 : break;
9972 : }
9973 49 : case CC_BitTests: {
9974 : // FIXME: Optimize away range check based on pivot comparisons.
9975 49 : BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex];
9976 :
9977 : // The bit test blocks haven't been inserted yet; insert them here.
9978 114 : for (BitTestCase &BTC : BTB->Cases)
9979 65 : CurMF->insert(BBI, BTC.ThisBB);
9980 :
9981 : // Fill in fields of the BitTestBlock.
9982 49 : BTB->Parent = CurMBB;
9983 49 : BTB->Default = Fallthrough;
9984 :
9985 49 : BTB->DefaultProb = UnhandledProbs;
9986 : // If the cases in bit test don't form a contiguous range, we evenly
9987 : // distribute the probability on the edge to Fallthrough to two
9988 : // successors of CurMBB.
9989 49 : if (!BTB->ContiguousRange) {
9990 : BTB->Prob += DefaultProb / 2;
9991 : BTB->DefaultProb -= DefaultProb / 2;
9992 : }
9993 :
9994 : // If we're in the right place, emit the bit test header right now.
9995 49 : if (CurMBB == SwitchMBB) {
9996 48 : visitBitTestHeader(*BTB, SwitchMBB);
9997 48 : BTB->Emitted = true;
9998 : }
9999 : break;
10000 : }
10001 18840 : case CC_Range: {
10002 : const Value *RHS, *LHS, *MHS;
10003 : ISD::CondCode CC;
10004 18840 : if (I->Low == I->High) {
10005 : // Check Cond == I->Low.
10006 : CC = ISD::SETEQ;
10007 : LHS = Cond;
10008 : RHS=I->Low;
10009 : MHS = nullptr;
10010 : } else {
10011 : // Check I->Low <= Cond <= I->High.
10012 : CC = ISD::SETLE;
10013 : LHS = I->Low;
10014 : MHS = Cond;
10015 : RHS = I->High;
10016 : }
10017 :
10018 : // The false probability is the sum of all unhandled cases.
10019 : CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
10020 37680 : getCurSDLoc(), I->Prob, UnhandledProbs);
10021 :
10022 18840 : if (CurMBB == SwitchMBB)
10023 8108 : visitSwitchCase(CB, SwitchMBB);
10024 : else
10025 10732 : SwitchCases.push_back(CB);
10026 :
10027 : break;
10028 : }
10029 : }
10030 : CurMBB = Fallthrough;
10031 : }
10032 : }
10033 :
10034 26 : unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
10035 : CaseClusterIt First,
10036 : CaseClusterIt Last) {
10037 26 : return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
10038 88 : if (X.Prob != CC.Prob)
10039 : return X.Prob > CC.Prob;
10040 :
10041 : // Ties are broken by comparing the case value.
10042 104 : return X.Low->getValue().slt(CC.Low->getValue());
10043 26 : });
10044 : }
10045 :
10046 101 : void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
10047 : const SwitchWorkListItem &W,
10048 : Value *Cond,
10049 : MachineBasicBlock *SwitchMBB) {
10050 : assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
10051 : "Clusters not sorted?");
10052 :
10053 : assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
10054 :
10055 : // Balance the tree based on branch probabilities to create a near-optimal (in
10056 : // terms of search time given key frequency) binary search tree. See e.g. Kurt
10057 : // Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
10058 101 : CaseClusterIt LastLeft = W.FirstCluster;
10059 101 : CaseClusterIt FirstRight = W.LastCluster;
10060 : auto LeftProb = LastLeft->Prob + W.DefaultProb / 2;
10061 : auto RightProb = FirstRight->Prob + W.DefaultProb / 2;
10062 :
10063 : // Move LastLeft and FirstRight towards each other from opposite directions to
10064 : // find a partitioning of the clusters which balances the probability on both
10065 : // sides. If LeftProb and RightProb are equal, alternate which side is
10066 : // taken to ensure 0-probability nodes are distributed evenly.
10067 : unsigned I = 0;
10068 480 : while (LastLeft + 1 < FirstRight) {
10069 379 : if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1)))
10070 : LeftProb += (++LastLeft)->Prob;
10071 : else
10072 : RightProb += (--FirstRight)->Prob;
10073 379 : I++;
10074 : }
10075 :
10076 : while (true) {
10077 : // Our binary search tree differs from a typical BST in that ours can have up
10078 : // to three values in each leaf. The pivot selection above doesn't take that
10079 : // into account, which means the tree might require more nodes and be less
10080 : // efficient. We compensate for this here.
10081 :
10082 111 : unsigned NumLeft = LastLeft - W.FirstCluster + 1;
10083 111 : unsigned NumRight = W.LastCluster - FirstRight + 1;
10084 :
10085 189 : if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) {
10086 : // If one side has less than 3 clusters, and the other has more than 3,
10087 : // consider taking a cluster from the other side.
10088 :
10089 13 : if (NumLeft < NumRight) {
10090 : // Consider moving the first cluster on the right to the left side.
10091 : CaseCluster &CC = *FirstRight;
10092 4 : unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
10093 4 : unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
10094 4 : if (LeftSideRank <= RightSideRank) {
10095 : // Moving the cluster to the left does not demote it.
10096 : ++LastLeft;
10097 : ++FirstRight;
10098 2 : continue;
10099 : }
10100 : } else {
10101 : assert(NumRight < NumLeft);
10102 : // Consider moving the last element on the left to the right side.
10103 : CaseCluster &CC = *LastLeft;
10104 9 : unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
10105 9 : unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
10106 9 : if (RightSideRank <= LeftSideRank) {
10107 : // Moving the cluster to the right does not demot it.
10108 : --LastLeft;
10109 : --FirstRight;
10110 8 : continue;
10111 : }
10112 : }
10113 : }
10114 101 : break;
10115 10 : }
10116 :
10117 : assert(LastLeft + 1 == FirstRight);
10118 : assert(LastLeft >= W.FirstCluster);
10119 : assert(FirstRight <= W.LastCluster);
10120 :
10121 : // Use the first element on the right as pivot since we will make less-than
10122 : // comparisons against it.
10123 : CaseClusterIt PivotCluster = FirstRight;
10124 : assert(PivotCluster > W.FirstCluster);
10125 : assert(PivotCluster <= W.LastCluster);
10126 :
10127 101 : CaseClusterIt FirstLeft = W.FirstCluster;
10128 101 : CaseClusterIt LastRight = W.LastCluster;
10129 :
10130 101 : const ConstantInt *Pivot = PivotCluster->Low;
10131 :
10132 : // New blocks will be inserted immediately after the current one.
10133 101 : MachineFunction::iterator BBI(W.MBB);
10134 : ++BBI;
10135 :
10136 : // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
10137 : // we can branch to its destination directly if it's squeezed exactly in
10138 : // between the known lower bound and Pivot - 1.
10139 : MachineBasicBlock *LeftMBB;
10140 2 : if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
10141 102 : FirstLeft->Low == W.GE &&
10142 101 : (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
10143 0 : LeftMBB = FirstLeft->MBB;
10144 : } else {
10145 101 : LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
10146 101 : FuncInfo.MF->insert(BBI, LeftMBB);
10147 404 : WorkList.push_back(
10148 202 : {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
10149 : // Put Cond in a virtual register to make it available from the new blocks.
10150 101 : ExportFromCurrentBlock(Cond);
10151 : }
10152 :
10153 : // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
10154 : // single cluster, RHS.Low == Pivot, and we can branch to its destination
10155 : // directly if RHS.High equals the current upper bound.
10156 : MachineBasicBlock *RightMBB;
10157 1 : if (FirstRight == LastRight && FirstRight->Kind == CC_Range &&
10158 102 : W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
10159 0 : RightMBB = FirstRight->MBB;
10160 : } else {
10161 101 : RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
10162 101 : FuncInfo.MF->insert(BBI, RightMBB);
10163 404 : WorkList.push_back(
10164 202 : {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
10165 : // Put Cond in a virtual register to make it available from the new blocks.
10166 101 : ExportFromCurrentBlock(Cond);
10167 : }
10168 :
10169 : // Create the CaseBlock record that will be used to lower the branch.
10170 101 : CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
10171 101 : getCurSDLoc(), LeftProb, RightProb);
10172 :
10173 101 : if (W.MBB == SwitchMBB)
10174 52 : visitSwitchCase(CB, SwitchMBB);
10175 : else
10176 49 : SwitchCases.push_back(CB);
10177 101 : }
10178 :
10179 : // Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
10180 : // from the swith statement.
10181 63 : static BranchProbability scaleCaseProbality(BranchProbability CaseProb,
10182 : BranchProbability PeeledCaseProb) {
10183 63 : if (PeeledCaseProb == BranchProbability::getOne())
10184 : return BranchProbability::getZero();
10185 63 : BranchProbability SwitchProb = PeeledCaseProb.getCompl();
10186 :
10187 63 : uint32_t Numerator = CaseProb.getNumerator();
10188 63 : uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator());
10189 63 : return BranchProbability(Numerator, std::max(Numerator, Denominator));
10190 : }
10191 :
10192 : // Try to peel the top probability case if it exceeds the threshold.
10193 : // Return current MachineBasicBlock for the switch statement if the peeling
10194 : // does not occur.
10195 : // If the peeling is performed, return the newly created MachineBasicBlock
10196 : // for the peeled switch statement. Also update Clusters to remove the peeled
10197 : // case. PeeledCaseProb is the BranchProbability for the peeled case.
10198 11423 : MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
10199 : const SwitchInst &SI, CaseClusterVector &Clusters,
10200 : BranchProbability &PeeledCaseProb) {
10201 11423 : MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
10202 : // Don't perform if there is only one cluster or optimizing for size.
10203 12319 : if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
10204 13273 : TM.getOptLevel() == CodeGenOpt::None ||
10205 925 : SwitchMBB->getParent()->getFunction().optForMinSize())
10206 10507 : return SwitchMBB;
10207 :
10208 916 : BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
10209 : unsigned PeeledCaseIndex = 0;
10210 : bool SwitchPeeled = false;
10211 5618 : for (unsigned Index = 0; Index < Clusters.size(); ++Index) {
10212 : CaseCluster &CC = Clusters[Index];
10213 3786 : if (CC.Prob < TopCaseProb)
10214 : continue;
10215 19 : TopCaseProb = CC.Prob;
10216 : PeeledCaseIndex = Index;
10217 : SwitchPeeled = true;
10218 : }
10219 916 : if (!SwitchPeeled)
10220 : return SwitchMBB;
10221 :
10222 : LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: "
10223 : << TopCaseProb << "\n");
10224 :
10225 : // Record the MBB for the peeled switch statement.
10226 : MachineFunction::iterator BBI(SwitchMBB);
10227 : ++BBI;
10228 : MachineBasicBlock *PeeledSwitchMBB =
10229 19 : FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock());
10230 19 : FuncInfo.MF->insert(BBI, PeeledSwitchMBB);
10231 :
10232 19 : ExportFromCurrentBlock(SI.getCondition());
10233 : auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex;
10234 19 : SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt,
10235 38 : nullptr, nullptr, TopCaseProb.getCompl()};
10236 19 : lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB);
10237 :
10238 : Clusters.erase(PeeledCaseIt);
10239 64 : for (CaseCluster &CC : Clusters) {
10240 : LLVM_DEBUG(
10241 : dbgs() << "Scale the probablity for one cluster, before scaling: "
10242 : << CC.Prob << "\n");
10243 45 : CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb);
10244 : LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
10245 : }
10246 19 : PeeledCaseProb = TopCaseProb;
10247 19 : return PeeledSwitchMBB;
10248 : }
10249 :
10250 11423 : void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
10251 : // Extract cases from the switch.
10252 11423 : BranchProbabilityInfo *BPI = FuncInfo.BPI;
10253 : CaseClusterVector Clusters;
10254 11423 : Clusters.reserve(SI.getNumCases());
10255 58189 : for (auto I : SI.cases()) {
10256 46766 : MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
10257 46766 : const ConstantInt *CaseVal = I.getCaseValue();
10258 : BranchProbability Prob =
10259 4636 : BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
10260 46766 : : BranchProbability(1, SI.getNumCases() + 1);
10261 46766 : Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
10262 : }
10263 :
10264 11423 : MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
10265 :
10266 : // Cluster adjacent cases with the same destination. We do this at all
10267 : // optimization levels because it's cheap to do and will make codegen faster
10268 : // if there are many clusters.
10269 11423 : sortAndRangeify(Clusters);
10270 :
10271 11423 : if (TM.getOptLevel() != CodeGenOpt::None) {
10272 : // Replace an unreachable default with the most popular destination.
10273 : // FIXME: Exploit unreachable default more aggressively.
10274 : bool UnreachableDefault =
10275 : isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg());
10276 967 : if (UnreachableDefault && !Clusters.empty()) {
10277 : DenseMap<const BasicBlock *, unsigned> Popularity;
10278 : unsigned MaxPop = 0;
10279 40 : const BasicBlock *MaxBB = nullptr;
10280 473 : for (auto I : SI.cases()) {
10281 433 : const BasicBlock *BB = I.getCaseSuccessor();
10282 433 : if (++Popularity[BB] > MaxPop) {
10283 129 : MaxPop = Popularity[BB];
10284 129 : MaxBB = BB;
10285 : }
10286 : }
10287 : // Set new default.
10288 : assert(MaxPop > 0 && MaxBB);
10289 40 : DefaultMBB = FuncInfo.MBBMap[MaxBB];
10290 :
10291 : // Remove cases that were pointing to the destination that is now the
10292 : // default.
10293 : CaseClusterVector New;
10294 80 : New.reserve(Clusters.size());
10295 390 : for (CaseCluster &CC : Clusters) {
10296 350 : if (CC.MBB != DefaultMBB)
10297 292 : New.push_back(CC);
10298 : }
10299 : Clusters = std::move(New);
10300 : }
10301 : }
10302 :
10303 : // The branch probablity of the peeled case.
10304 11423 : BranchProbability PeeledCaseProb = BranchProbability::getZero();
10305 : MachineBasicBlock *PeeledSwitchMBB =
10306 11423 : peelDominantCaseCluster(SI, Clusters, PeeledCaseProb);
10307 :
10308 : // If there is only the default destination, jump there directly.
10309 11423 : MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
10310 11423 : if (Clusters.empty()) {
10311 : assert(PeeledSwitchMBB == SwitchMBB);
10312 4 : SwitchMBB->addSuccessor(DefaultMBB);
10313 4 : if (DefaultMBB != NextBlock(SwitchMBB)) {
10314 4 : DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
10315 6 : getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
10316 : }
10317 : return;
10318 : }
10319 :
10320 11419 : findJumpTables(Clusters, &SI, DefaultMBB);
10321 11419 : findBitTestClusters(Clusters, &SI);
10322 :
10323 : LLVM_DEBUG({
10324 : dbgs() << "Case clusters: ";
10325 : for (const CaseCluster &C : Clusters) {
10326 : if (C.Kind == CC_JumpTable)
10327 : dbgs() << "JT:";
10328 : if (C.Kind == CC_BitTests)
10329 : dbgs() << "BT:";
10330 :
10331 : C.Low->getValue().print(dbgs(), true);
10332 : if (C.Low != C.High) {
10333 : dbgs() << '-';
10334 : C.High->getValue().print(dbgs(), true);
10335 : }
10336 : dbgs() << ' ';
10337 : }
10338 : dbgs() << '\n';
10339 : });
10340 :
10341 : assert(!Clusters.empty());
10342 : SwitchWorkList WorkList;
10343 : CaseClusterIt First = Clusters.begin();
10344 : CaseClusterIt Last = Clusters.end() - 1;
10345 11419 : auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB);
10346 : // Scale the branchprobability for DefaultMBB if the peel occurs and
10347 : // DefaultMBB is not replaced.
10348 11419 : if (PeeledCaseProb != BranchProbability::getZero() &&
10349 11419 : DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()])
10350 18 : DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb);
10351 11419 : WorkList.push_back(
10352 : {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
10353 :
10354 23040 : while (!WorkList.empty()) {
10355 11621 : SwitchWorkListItem W = WorkList.back();
10356 : WorkList.pop_back();
10357 11621 : unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
10358 :
10359 11723 : if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
10360 102 : !DefaultMBB->getParent()->getFunction().optForMinSize()) {
10361 : // For optimized builds, lower large range as a balanced binary tree.
10362 101 : splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
10363 101 : continue;
10364 : }
10365 :
10366 11520 : lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
10367 : }
10368 : }
|