LLVM  7.0.0svn
SelectionDAGBuilder.cpp
Go to the documentation of this file.
1 //===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements routines for translating from LLVM IR into SelectionDAG IR.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SelectionDAGBuilder.h"
15 #include "SDNodeDbgValue.h"
16 #include "llvm/ADT/APFloat.h"
17 #include "llvm/ADT/APInt.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/None.h"
22 #include "llvm/ADT/Optional.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/ADT/Triple.h"
29 #include "llvm/ADT/Twine.h"
34 #include "llvm/Analysis/Loads.h"
39 #include "llvm/CodeGen/Analysis.h"
57 #include "llvm/CodeGen/StackMaps.h"
66 #include "llvm/IR/Argument.h"
67 #include "llvm/IR/Attributes.h"
68 #include "llvm/IR/BasicBlock.h"
69 #include "llvm/IR/CFG.h"
70 #include "llvm/IR/CallSite.h"
71 #include "llvm/IR/CallingConv.h"
72 #include "llvm/IR/Constant.h"
73 #include "llvm/IR/ConstantRange.h"
74 #include "llvm/IR/Constants.h"
75 #include "llvm/IR/DataLayout.h"
77 #include "llvm/IR/DebugLoc.h"
78 #include "llvm/IR/DerivedTypes.h"
79 #include "llvm/IR/Function.h"
81 #include "llvm/IR/InlineAsm.h"
82 #include "llvm/IR/InstrTypes.h"
83 #include "llvm/IR/Instruction.h"
84 #include "llvm/IR/Instructions.h"
85 #include "llvm/IR/IntrinsicInst.h"
86 #include "llvm/IR/Intrinsics.h"
87 #include "llvm/IR/LLVMContext.h"
88 #include "llvm/IR/Metadata.h"
89 #include "llvm/IR/Module.h"
90 #include "llvm/IR/Operator.h"
91 #include "llvm/IR/Statepoint.h"
92 #include "llvm/IR/Type.h"
93 #include "llvm/IR/User.h"
94 #include "llvm/IR/Value.h"
95 #include "llvm/MC/MCContext.h"
96 #include "llvm/MC/MCSymbol.h"
99 #include "llvm/Support/Casting.h"
100 #include "llvm/Support/CodeGen.h"
102 #include "llvm/Support/Compiler.h"
103 #include "llvm/Support/Debug.h"
106 #include "llvm/Support/MathExtras.h"
111 #include <algorithm>
112 #include <cassert>
113 #include <cstddef>
114 #include <cstdint>
115 #include <cstring>
116 #include <iterator>
117 #include <limits>
118 #include <numeric>
119 #include <tuple>
120 #include <utility>
121 #include <vector>
122 
123 using namespace llvm;
124 
125 #define DEBUG_TYPE "isel"
126 
127 /// LimitFloatPrecision - Generate low-precision inline sequences for
128 /// some float libcalls (6, 8 or 12 bits).
129 static unsigned LimitFloatPrecision;
130 
132  LimitFPPrecision("limit-float-precision",
133  cl::desc("Generate low-precision inline sequences "
134  "for some float libcalls"),
135  cl::location(LimitFloatPrecision), cl::Hidden,
136  cl::init(0));
137 
139  "switch-peel-threshold", cl::Hidden, cl::init(66),
140  cl::desc("Set the case probability threshold for peeling the case from a "
141  "switch statement. A value greater than 100 will void this "
142  "optimization"));
143 
144 // Limit the width of DAG chains. This is important in general to prevent
145 // DAG-based analysis from blowing up. For example, alias analysis and
146 // load clustering may not complete in reasonable time. It is difficult to
147 // recognize and avoid this situation within each individual analysis, and
148 // future analyses are likely to have the same behavior. Limiting DAG width is
149 // the safe approach and will be especially important with global DAGs.
150 //
151 // MaxParallelChains default is arbitrarily high to avoid affecting
152 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st
153 // sequence over this should have been converted to llvm.memcpy by the
154 // frontend. It is easy to induce this behavior with .ll code such as:
155 // %buffer = alloca [4096 x i8]
156 // %data = load [4096 x i8]* %argPtr
157 // store [4096 x i8] %data, [4096 x i8]* %buffer
158 static const unsigned MaxParallelChains = 64;
159 
160 // True if the Value passed requires ABI mangling as it is a parameter to a
161 // function or a return value from a function which is not an intrinsic.
162 static bool isABIRegCopy(const Value *V) {
163  const bool IsRetInst = V && isa<ReturnInst>(V);
164  const bool IsCallInst = V && isa<CallInst>(V);
165  const bool IsInLineAsm =
166  IsCallInst && static_cast<const CallInst *>(V)->isInlineAsm();
167  const bool IsIndirectFunctionCall =
168  IsCallInst && !IsInLineAsm &&
169  !static_cast<const CallInst *>(V)->getCalledFunction();
170  // It is possible that the call instruction is an inline asm statement or an
171  // indirect function call in which case the return value of
172  // getCalledFunction() would be nullptr.
173  const bool IsInstrinsicCall =
174  IsCallInst && !IsInLineAsm && !IsIndirectFunctionCall &&
175  static_cast<const CallInst *>(V)->getCalledFunction()->getIntrinsicID() !=
177 
178  return IsRetInst || (IsCallInst && (!IsInLineAsm && !IsInstrinsicCall));
179 }
180 
181 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
182  const SDValue *Parts, unsigned NumParts,
183  MVT PartVT, EVT ValueVT, const Value *V,
184  bool IsABIRegCopy);
185 
186 /// getCopyFromParts - Create a value that contains the specified legal parts
187 /// combined into the value they represent. If the parts combine to a type
188 /// larger than ValueVT then AssertOp can be used to specify whether the extra
189 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
190 /// (ISD::AssertSext).
192  const SDValue *Parts, unsigned NumParts,
193  MVT PartVT, EVT ValueVT, const Value *V,
194  Optional<ISD::NodeType> AssertOp = None,
195  bool IsABIRegCopy = false) {
196  if (ValueVT.isVector())
197  return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
198  PartVT, ValueVT, V, IsABIRegCopy);
199 
200  assert(NumParts > 0 && "No parts to assemble!");
201  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
202  SDValue Val = Parts[0];
203 
204  if (NumParts > 1) {
205  // Assemble the value from multiple parts.
206  if (ValueVT.isInteger()) {
207  unsigned PartBits = PartVT.getSizeInBits();
208  unsigned ValueBits = ValueVT.getSizeInBits();
209 
210  // Assemble the power of 2 part.
211  unsigned RoundParts = NumParts & (NumParts - 1) ?
212  1 << Log2_32(NumParts) : NumParts;
213  unsigned RoundBits = PartBits * RoundParts;
214  EVT RoundVT = RoundBits == ValueBits ?
215  ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
216  SDValue Lo, Hi;
217 
218  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
219 
220  if (RoundParts > 2) {
221  Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
222  PartVT, HalfVT, V);
223  Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
224  RoundParts / 2, PartVT, HalfVT, V);
225  } else {
226  Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
227  Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
228  }
229 
230  if (DAG.getDataLayout().isBigEndian())
231  std::swap(Lo, Hi);
232 
233  Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
234 
235  if (RoundParts < NumParts) {
236  // Assemble the trailing non-power-of-2 part.
237  unsigned OddParts = NumParts - RoundParts;
238  EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
239  Hi = getCopyFromParts(DAG, DL,
240  Parts + RoundParts, OddParts, PartVT, OddVT, V);
241 
242  // Combine the round and odd parts.
243  Lo = Val;
244  if (DAG.getDataLayout().isBigEndian())
245  std::swap(Lo, Hi);
246  EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
247  Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
248  Hi =
249  DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
250  DAG.getConstant(Lo.getValueSizeInBits(), DL,
251  TLI.getPointerTy(DAG.getDataLayout())));
252  Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
253  Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
254  }
255  } else if (PartVT.isFloatingPoint()) {
256  // FP split into multiple FP parts (for ppcf128)
257  assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
258  "Unexpected split");
259  SDValue Lo, Hi;
260  Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
261  Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
262  if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
263  std::swap(Lo, Hi);
264  Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
265  } else {
266  // FP split into integer parts (soft fp)
267  assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
268  !PartVT.isVector() && "Unexpected split");
269  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
270  Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
271  }
272  }
273 
274  // There is now one part, held in Val. Correct it to match ValueVT.
275  // PartEVT is the type of the register class that holds the value.
276  // ValueVT is the type of the inline asm operation.
277  EVT PartEVT = Val.getValueType();
278 
279  if (PartEVT == ValueVT)
280  return Val;
281 
282  if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
283  ValueVT.bitsLT(PartEVT)) {
284  // For an FP value in an integer part, we need to truncate to the right
285  // width first.
286  PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
287  Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
288  }
289 
290  // Handle types that have the same size.
291  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
292  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
293 
294  // Handle types with different sizes.
295  if (PartEVT.isInteger() && ValueVT.isInteger()) {
296  if (ValueVT.bitsLT(PartEVT)) {
297  // For a truncate, see if we have any information to
298  // indicate whether the truncated bits will always be
299  // zero or sign-extension.
300  if (AssertOp.hasValue())
301  Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
302  DAG.getValueType(ValueVT));
303  return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
304  }
305  return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
306  }
307 
308  if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
309  // FP_ROUND's are always exact here.
310  if (ValueVT.bitsLT(Val.getValueType()))
311  return DAG.getNode(
312  ISD::FP_ROUND, DL, ValueVT, Val,
313  DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
314 
315  return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
316  }
317 
318  llvm_unreachable("Unknown mismatch!");
319 }
320 
322  const Twine &ErrMsg) {
323  const Instruction *I = dyn_cast_or_null<Instruction>(V);
324  if (!V)
325  return Ctx.emitError(ErrMsg);
326 
327  const char *AsmError = ", possible invalid constraint for vector type";
328  if (const CallInst *CI = dyn_cast<CallInst>(I))
329  if (isa<InlineAsm>(CI->getCalledValue()))
330  return Ctx.emitError(I, ErrMsg + AsmError);
331 
332  return Ctx.emitError(I, ErrMsg);
333 }
334 
335 /// getCopyFromPartsVector - Create a value that contains the specified legal
336 /// parts combined into the value they represent. If the parts combine to a
337 /// type larger than ValueVT then AssertOp can be used to specify whether the
338 /// extra bits are known to be zero (ISD::AssertZext) or sign extended from
339 /// ValueVT (ISD::AssertSext).
341  const SDValue *Parts, unsigned NumParts,
342  MVT PartVT, EVT ValueVT, const Value *V,
343  bool IsABIRegCopy) {
344  assert(ValueVT.isVector() && "Not a vector value");
345  assert(NumParts > 0 && "No parts to assemble!");
346  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
347  SDValue Val = Parts[0];
348 
349  // Handle a multi-element vector.
350  if (NumParts > 1) {
351  EVT IntermediateVT;
352  MVT RegisterVT;
353  unsigned NumIntermediates;
354  unsigned NumRegs;
355 
356  if (IsABIRegCopy) {
358  *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
359  RegisterVT);
360  } else {
361  NumRegs =
362  TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
363  NumIntermediates, RegisterVT);
364  }
365 
366  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
367  NumParts = NumRegs; // Silence a compiler warning.
368  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
369  assert(RegisterVT.getSizeInBits() ==
370  Parts[0].getSimpleValueType().getSizeInBits() &&
371  "Part type sizes don't match!");
372 
373  // Assemble the parts into intermediate operands.
374  SmallVector<SDValue, 8> Ops(NumIntermediates);
375  if (NumIntermediates == NumParts) {
376  // If the register was not expanded, truncate or copy the value,
377  // as appropriate.
378  for (unsigned i = 0; i != NumParts; ++i)
379  Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
380  PartVT, IntermediateVT, V);
381  } else if (NumParts > 0) {
382  // If the intermediate type was expanded, build the intermediate
383  // operands from the parts.
384  assert(NumParts % NumIntermediates == 0 &&
385  "Must expand into a divisible number of parts!");
386  unsigned Factor = NumParts / NumIntermediates;
387  for (unsigned i = 0; i != NumIntermediates; ++i)
388  Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
389  PartVT, IntermediateVT, V);
390  }
391 
392  // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
393  // intermediate operands.
394  EVT BuiltVectorTy =
395  EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
396  (IntermediateVT.isVector()
397  ? IntermediateVT.getVectorNumElements() * NumParts
398  : NumIntermediates));
399  Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
401  DL, BuiltVectorTy, Ops);
402  }
403 
404  // There is now one part, held in Val. Correct it to match ValueVT.
405  EVT PartEVT = Val.getValueType();
406 
407  if (PartEVT == ValueVT)
408  return Val;
409 
410  if (PartEVT.isVector()) {
411  // If the element type of the source/dest vectors are the same, but the
412  // parts vector has more elements than the value vector, then we have a
413  // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
414  // elements we want.
415  if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
416  assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
417  "Cannot narrow, it would be a lossy transformation");
418  return DAG.getNode(
419  ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
420  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
421  }
422 
423  // Vector/Vector bitcast.
424  if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
425  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
426 
427  assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
428  "Cannot handle this kind of promotion");
429  // Promoted vector extract
430  return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
431 
432  }
433 
434  // Trivial bitcast if the types are the same size and the destination
435  // vector type is legal.
436  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
437  TLI.isTypeLegal(ValueVT))
438  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
439 
440  if (ValueVT.getVectorNumElements() != 1) {
441  // Certain ABIs require that vectors are passed as integers. For vectors
442  // are the same size, this is an obvious bitcast.
443  if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
444  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
445  } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
446  // Bitcast Val back the original type and extract the corresponding
447  // vector we want.
448  unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
449  EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
450  ValueVT.getVectorElementType(), Elts);
451  Val = DAG.getBitcast(WiderVecType, Val);
452  return DAG.getNode(
453  ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
454  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
455  }
456 
458  *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
459  return DAG.getUNDEF(ValueVT);
460  }
461 
462  // Handle cases such as i8 -> <1 x i1>
463  EVT ValueSVT = ValueVT.getVectorElementType();
464  if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
465  Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
466  : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
467 
468  return DAG.getBuildVector(ValueVT, DL, Val);
469 }
470 
471 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
472  SDValue Val, SDValue *Parts, unsigned NumParts,
473  MVT PartVT, const Value *V, bool IsABIRegCopy);
474 
475 /// getCopyToParts - Create a series of nodes that contain the specified value
476 /// split into legal parts. If the parts contain more bits than Val, then, for
477 /// integers, ExtendKind can be used to specify how to generate the extra bits.
478 static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
479  SDValue *Parts, unsigned NumParts, MVT PartVT,
480  const Value *V,
481  ISD::NodeType ExtendKind = ISD::ANY_EXTEND,
482  bool IsABIRegCopy = false) {
483  EVT ValueVT = Val.getValueType();
484 
485  // Handle the vector case separately.
486  if (ValueVT.isVector())
487  return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
488  IsABIRegCopy);
489 
490  unsigned PartBits = PartVT.getSizeInBits();
491  unsigned OrigNumParts = NumParts;
492  assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
493  "Copying to an illegal type!");
494 
495  if (NumParts == 0)
496  return;
497 
498  assert(!ValueVT.isVector() && "Vector case handled elsewhere");
499  EVT PartEVT = PartVT;
500  if (PartEVT == ValueVT) {
501  assert(NumParts == 1 && "No-op copy with multiple parts!");
502  Parts[0] = Val;
503  return;
504  }
505 
506  if (NumParts * PartBits > ValueVT.getSizeInBits()) {
507  // If the parts cover more bits than the value has, promote the value.
508  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
509  assert(NumParts == 1 && "Do not know what to promote to!");
510  Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
511  } else {
512  if (ValueVT.isFloatingPoint()) {
513  // FP values need to be bitcast, then extended if they are being put
514  // into a larger container.
515  ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
516  Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
517  }
518  assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
519  ValueVT.isInteger() &&
520  "Unknown mismatch!");
521  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
522  Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
523  if (PartVT == MVT::x86mmx)
524  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
525  }
526  } else if (PartBits == ValueVT.getSizeInBits()) {
527  // Different types of the same size.
528  assert(NumParts == 1 && PartEVT != ValueVT);
529  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
530  } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
531  // If the parts cover less bits than value has, truncate the value.
532  assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
533  ValueVT.isInteger() &&
534  "Unknown mismatch!");
535  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
536  Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
537  if (PartVT == MVT::x86mmx)
538  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
539  }
540 
541  // The value may have changed - recompute ValueVT.
542  ValueVT = Val.getValueType();
543  assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
544  "Failed to tile the value with PartVT!");
545 
546  if (NumParts == 1) {
547  if (PartEVT != ValueVT) {
549  "scalar-to-vector conversion failed");
550  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
551  }
552 
553  Parts[0] = Val;
554  return;
555  }
556 
557  // Expand the value into multiple parts.
558  if (NumParts & (NumParts - 1)) {
559  // The number of parts is not a power of 2. Split off and copy the tail.
560  assert(PartVT.isInteger() && ValueVT.isInteger() &&
561  "Do not know what to expand to!");
562  unsigned RoundParts = 1 << Log2_32(NumParts);
563  unsigned RoundBits = RoundParts * PartBits;
564  unsigned OddParts = NumParts - RoundParts;
565  SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
566  DAG.getIntPtrConstant(RoundBits, DL));
567  getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
568 
569  if (DAG.getDataLayout().isBigEndian())
570  // The odd parts were reversed by getCopyToParts - unreverse them.
571  std::reverse(Parts + RoundParts, Parts + NumParts);
572 
573  NumParts = RoundParts;
574  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
575  Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
576  }
577 
578  // The number of parts is a power of 2. Repeatedly bisect the value using
579  // EXTRACT_ELEMENT.
580  Parts[0] = DAG.getNode(ISD::BITCAST, DL,
582  ValueVT.getSizeInBits()),
583  Val);
584 
585  for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
586  for (unsigned i = 0; i < NumParts; i += StepSize) {
587  unsigned ThisBits = StepSize * PartBits / 2;
588  EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
589  SDValue &Part0 = Parts[i];
590  SDValue &Part1 = Parts[i+StepSize/2];
591 
592  Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
593  ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
594  Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
595  ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
596 
597  if (ThisBits == PartBits && ThisVT != PartVT) {
598  Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
599  Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
600  }
601  }
602  }
603 
604  if (DAG.getDataLayout().isBigEndian())
605  std::reverse(Parts, Parts + OrigNumParts);
606 }
607 
608 
609 /// getCopyToPartsVector - Create a series of nodes that contain the specified
610 /// value split into legal parts.
611 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
612  SDValue Val, SDValue *Parts, unsigned NumParts,
613  MVT PartVT, const Value *V,
614  bool IsABIRegCopy) {
615  EVT ValueVT = Val.getValueType();
616  assert(ValueVT.isVector() && "Not a vector");
617  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
618 
619  if (NumParts == 1) {
620  EVT PartEVT = PartVT;
621  if (PartEVT == ValueVT) {
622  // Nothing to do.
623  } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
624  // Bitconvert vector->vector case.
625  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
626  } else if (PartVT.isVector() &&
627  PartEVT.getVectorElementType() == ValueVT.getVectorElementType() &&
628  PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
629  EVT ElementVT = PartVT.getVectorElementType();
630  // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
631  // undef elements.
633  for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
634  Ops.push_back(DAG.getNode(
635  ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val,
636  DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
637 
638  for (unsigned i = ValueVT.getVectorNumElements(),
639  e = PartVT.getVectorNumElements(); i != e; ++i)
640  Ops.push_back(DAG.getUNDEF(ElementVT));
641 
642  Val = DAG.getBuildVector(PartVT, DL, Ops);
643 
644  // FIXME: Use CONCAT for 2x -> 4x.
645 
646  //SDValue UndefElts = DAG.getUNDEF(VectorTy);
647  //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
648  } else if (PartVT.isVector() &&
649  PartEVT.getVectorElementType().bitsGE(
650  ValueVT.getVectorElementType()) &&
651  PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
652 
653  // Promoted vector extract
654  Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
655  } else {
656  if (ValueVT.getVectorNumElements() == 1) {
657  Val = DAG.getNode(
658  ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
659  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
660  } else {
661  assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
662  "lossy conversion of vector to scalar type");
663  EVT IntermediateType =
664  EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
665  Val = DAG.getBitcast(IntermediateType, Val);
666  Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
667  }
668  }
669 
670  assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
671  Parts[0] = Val;
672  return;
673  }
674 
675  // Handle a multi-element vector.
676  EVT IntermediateVT;
677  MVT RegisterVT;
678  unsigned NumIntermediates;
679  unsigned NumRegs;
680  if (IsABIRegCopy) {
681  NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
682  *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
683  RegisterVT);
684  } else {
685  NumRegs =
686  TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
687  NumIntermediates, RegisterVT);
688  }
689  unsigned NumElements = ValueVT.getVectorNumElements();
690 
691  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
692  NumParts = NumRegs; // Silence a compiler warning.
693  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
694 
695  // Convert the vector to the appropiate type if necessary.
696  unsigned DestVectorNoElts =
697  NumIntermediates *
698  (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1);
699  EVT BuiltVectorTy = EVT::getVectorVT(
700  *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
701  if (Val.getValueType() != BuiltVectorTy)
702  Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
703 
704  // Split the vector into intermediate operands.
705  SmallVector<SDValue, 8> Ops(NumIntermediates);
706  for (unsigned i = 0; i != NumIntermediates; ++i) {
707  if (IntermediateVT.isVector())
708  Ops[i] =
709  DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
710  DAG.getConstant(i * (NumElements / NumIntermediates), DL,
711  TLI.getVectorIdxTy(DAG.getDataLayout())));
712  else
713  Ops[i] = DAG.getNode(
714  ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
715  DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
716  }
717 
718  // Split the intermediate operands into legal parts.
719  if (NumParts == NumIntermediates) {
720  // If the register was not expanded, promote or copy the value,
721  // as appropriate.
722  for (unsigned i = 0; i != NumParts; ++i)
723  getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
724  } else if (NumParts > 0) {
725  // If the intermediate type was expanded, split each the value into
726  // legal parts.
727  assert(NumIntermediates != 0 && "division by zero");
728  assert(NumParts % NumIntermediates == 0 &&
729  "Must expand into a divisible number of parts!");
730  unsigned Factor = NumParts / NumIntermediates;
731  for (unsigned i = 0; i != NumIntermediates; ++i)
732  getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
733  }
734 }
735 
737  EVT valuevt, bool IsABIMangledValue)
738  : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
739  RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {}
740 
742  const DataLayout &DL, unsigned Reg, Type *Ty,
743  bool IsABIMangledValue) {
744  ComputeValueVTs(TLI, DL, Ty, ValueVTs);
745 
746  IsABIMangled = IsABIMangledValue;
747 
748  for (EVT ValueVT : ValueVTs) {
749  unsigned NumRegs = IsABIMangledValue
750  ? TLI.getNumRegistersForCallingConv(Context, ValueVT)
751  : TLI.getNumRegisters(Context, ValueVT);
752  MVT RegisterVT = IsABIMangledValue
753  ? TLI.getRegisterTypeForCallingConv(Context, ValueVT)
754  : TLI.getRegisterType(Context, ValueVT);
755  for (unsigned i = 0; i != NumRegs; ++i)
756  Regs.push_back(Reg + i);
757  RegVTs.push_back(RegisterVT);
758  RegCount.push_back(NumRegs);
759  Reg += NumRegs;
760  }
761 }
762 
764  FunctionLoweringInfo &FuncInfo,
765  const SDLoc &dl, SDValue &Chain,
766  SDValue *Flag, const Value *V) const {
767  // A Value with type {} or [0 x %t] needs no registers.
768  if (ValueVTs.empty())
769  return SDValue();
770 
771  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
772 
773  // Assemble the legal parts into the final values.
774  SmallVector<SDValue, 4> Values(ValueVTs.size());
776  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
777  // Copy the legal parts from the registers.
778  EVT ValueVT = ValueVTs[Value];
779  unsigned NumRegs = RegCount[Value];
780  MVT RegisterVT = IsABIMangled
782  : RegVTs[Value];
783 
784  Parts.resize(NumRegs);
785  for (unsigned i = 0; i != NumRegs; ++i) {
786  SDValue P;
787  if (!Flag) {
788  P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
789  } else {
790  P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
791  *Flag = P.getValue(2);
792  }
793 
794  Chain = P.getValue(1);
795  Parts[i] = P;
796 
797  // If the source register was virtual and if we know something about it,
798  // add an assert node.
800  !RegisterVT.isInteger() || RegisterVT.isVector())
801  continue;
802 
804  FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
805  if (!LOI)
806  continue;
807 
808  unsigned RegSize = RegisterVT.getSizeInBits();
809  unsigned NumSignBits = LOI->NumSignBits;
810  unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
811 
812  if (NumZeroBits == RegSize) {
813  // The current value is a zero.
814  // Explicitly express that as it would be easier for
815  // optimizations to kick in.
816  Parts[i] = DAG.getConstant(0, dl, RegisterVT);
817  continue;
818  }
819 
820  // FIXME: We capture more information than the dag can represent. For
821  // now, just use the tightest assertzext/assertsext possible.
822  bool isSExt = true;
823  EVT FromVT(MVT::Other);
824  if (NumSignBits == RegSize) {
825  isSExt = true; // ASSERT SEXT 1
826  FromVT = MVT::i1;
827  } else if (NumZeroBits >= RegSize - 1) {
828  isSExt = false; // ASSERT ZEXT 1
829  FromVT = MVT::i1;
830  } else if (NumSignBits > RegSize - 8) {
831  isSExt = true; // ASSERT SEXT 8
832  FromVT = MVT::i8;
833  } else if (NumZeroBits >= RegSize - 8) {
834  isSExt = false; // ASSERT ZEXT 8
835  FromVT = MVT::i8;
836  } else if (NumSignBits > RegSize - 16) {
837  isSExt = true; // ASSERT SEXT 16
838  FromVT = MVT::i16;
839  } else if (NumZeroBits >= RegSize - 16) {
840  isSExt = false; // ASSERT ZEXT 16
841  FromVT = MVT::i16;
842  } else if (NumSignBits > RegSize - 32) {
843  isSExt = true; // ASSERT SEXT 32
844  FromVT = MVT::i32;
845  } else if (NumZeroBits >= RegSize - 32) {
846  isSExt = false; // ASSERT ZEXT 32
847  FromVT = MVT::i32;
848  } else {
849  continue;
850  }
851  // Add an assertion node.
852  assert(FromVT != MVT::Other);
853  Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
854  RegisterVT, P, DAG.getValueType(FromVT));
855  }
856 
857  Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
858  NumRegs, RegisterVT, ValueVT, V);
859  Part += NumRegs;
860  Parts.clear();
861  }
862 
863  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
864 }
865 
867  const SDLoc &dl, SDValue &Chain, SDValue *Flag,
868  const Value *V,
869  ISD::NodeType PreferredExtendType) const {
870  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
871  ISD::NodeType ExtendKind = PreferredExtendType;
872 
873  // Get the list of the values's legal parts.
874  unsigned NumRegs = Regs.size();
875  SmallVector<SDValue, 8> Parts(NumRegs);
876  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
877  unsigned NumParts = RegCount[Value];
878 
879  MVT RegisterVT = IsABIMangled
881  : RegVTs[Value];
882 
883  if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
884  ExtendKind = ISD::ZERO_EXTEND;
885 
886  getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
887  &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
888  Part += NumParts;
889  }
890 
891  // Copy the parts into the registers.
892  SmallVector<SDValue, 8> Chains(NumRegs);
893  for (unsigned i = 0; i != NumRegs; ++i) {
894  SDValue Part;
895  if (!Flag) {
896  Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
897  } else {
898  Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
899  *Flag = Part.getValue(1);
900  }
901 
902  Chains[i] = Part.getValue(0);
903  }
904 
905  if (NumRegs == 1 || Flag)
906  // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
907  // flagged to it. That is the CopyToReg nodes and the user are considered
908  // a single scheduling unit. If we create a TokenFactor and return it as
909  // chain, then the TokenFactor is both a predecessor (operand) of the
910  // user as well as a successor (the TF operands are flagged to the user).
911  // c1, f1 = CopyToReg
912  // c2, f2 = CopyToReg
913  // c3 = TokenFactor c1, c2
914  // ...
915  // = op c3, ..., f2
916  Chain = Chains[NumRegs-1];
917  else
918  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
919 }
920 
921 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
922  unsigned MatchingIdx, const SDLoc &dl,
923  SelectionDAG &DAG,
924  std::vector<SDValue> &Ops) const {
925  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
926 
927  unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
928  if (HasMatching)
929  Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
930  else if (!Regs.empty() &&
932  // Put the register class of the virtual registers in the flag word. That
933  // way, later passes can recompute register class constraints for inline
934  // assembly as well as normal instructions.
935  // Don't do this for tied operands that can use the regclass information
936  // from the def.
938  const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
939  Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
940  }
941 
942  SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
943  Ops.push_back(Res);
944 
945  if (Code == InlineAsm::Kind_Clobber) {
946  // Clobbers should always have a 1:1 mapping with registers, and may
947  // reference registers that have illegal (e.g. vector) types. Hence, we
948  // shouldn't try to apply any sort of splitting logic to them.
949  assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
950  "No 1:1 mapping from clobbers to regs?");
951  unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
952  (void)SP;
953  for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
954  Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
955  assert(
956  (Regs[I] != SP ||
958  "If we clobbered the stack pointer, MFI should know about it.");
959  }
960  return;
961  }
962 
963  for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
964  unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
965  MVT RegisterVT = RegVTs[Value];
966  for (unsigned i = 0; i != NumRegs; ++i) {
967  assert(Reg < Regs.size() && "Mismatch in # registers expected");
968  unsigned TheReg = Regs[Reg++];
969  Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
970  }
971  }
972 }
973 
977  unsigned I = 0;
978  for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
979  unsigned RegCount = std::get<0>(CountAndVT);
980  MVT RegisterVT = std::get<1>(CountAndVT);
981  unsigned RegisterSize = RegisterVT.getSizeInBits();
982  for (unsigned E = I + RegCount; I != E; ++I)
983  OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
984  }
985  return OutVec;
986 }
987 
989  const TargetLibraryInfo *li) {
990  AA = aa;
991  GFI = gfi;
992  LibInfo = li;
993  DL = &DAG.getDataLayout();
994  Context = DAG.getContext();
995  LPadToCallSiteMap.clear();
996 }
997 
999  NodeMap.clear();
1000  UnusedArgNodeMap.clear();
1001  PendingLoads.clear();
1002  PendingExports.clear();
1003  CurInst = nullptr;
1004  HasTailCall = false;
1005  SDNodeOrder = LowestSDNodeOrder;
1006  StatepointLowering.clear();
1007 }
1008 
1010  DanglingDebugInfoMap.clear();
1011 }
1012 
1014  if (PendingLoads.empty())
1015  return DAG.getRoot();
1016 
1017  if (PendingLoads.size() == 1) {
1018  SDValue Root = PendingLoads[0];
1019  DAG.setRoot(Root);
1020  PendingLoads.clear();
1021  return Root;
1022  }
1023 
1024  // Otherwise, we have to make a token factor node.
1025  SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
1026  PendingLoads);
1027  PendingLoads.clear();
1028  DAG.setRoot(Root);
1029  return Root;
1030 }
1031 
1033  SDValue Root = DAG.getRoot();
1034 
1035  if (PendingExports.empty())
1036  return Root;
1037 
1038  // Turn all of the CopyToReg chains into one factored node.
1039  if (Root.getOpcode() != ISD::EntryToken) {
1040  unsigned i = 0, e = PendingExports.size();
1041  for (; i != e; ++i) {
1042  assert(PendingExports[i].getNode()->getNumOperands() > 1);
1043  if (PendingExports[i].getNode()->getOperand(0) == Root)
1044  break; // Don't add the root if we already indirectly depend on it.
1045  }
1046 
1047  if (i == e)
1048  PendingExports.push_back(Root);
1049  }
1050 
1051  Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
1052  PendingExports);
1053  PendingExports.clear();
1054  DAG.setRoot(Root);
1055  return Root;
1056 }
1057 
1059  // Set up outgoing PHI node register values before emitting the terminator.
1060  if (isa<TerminatorInst>(&I)) {
1061  HandlePHINodesInSuccessorBlocks(I.getParent());
1062  }
1063 
1064  // Increase the SDNodeOrder if dealing with a non-debug instruction.
1065  if (!isa<DbgInfoIntrinsic>(I))
1066  ++SDNodeOrder;
1067 
1068  CurInst = &I;
1069 
1070  visit(I.getOpcode(), I);
1071 
1072  if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) {
1073  // Propagate the fast-math-flags of this IR instruction to the DAG node that
1074  // maps to this instruction.
1075  // TODO: We could handle all flags (nsw, etc) here.
1076  // TODO: If an IR instruction maps to >1 node, only the final node will have
1077  // flags set.
1078  if (SDNode *Node = getNodeForIRValue(&I)) {
1079  SDNodeFlags IncomingFlags;
1080  IncomingFlags.copyFMF(*FPMO);
1081  if (!Node->getFlags().isDefined())
1082  Node->setFlags(IncomingFlags);
1083  else
1084  Node->intersectFlagsWith(IncomingFlags);
1085  }
1086  }
1087 
1088  if (!isa<TerminatorInst>(&I) && !HasTailCall &&
1089  !isStatepoint(&I)) // statepoints handle their exports internally
1090  CopyToExportRegsIfNeeded(&I);
1091 
1092  CurInst = nullptr;
1093 }
1094 
1095 void SelectionDAGBuilder::visitPHI(const PHINode &) {
1096  llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
1097 }
1098 
1099 void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
1100  // Note: this doesn't use InstVisitor, because it has to work with
1101  // ConstantExpr's in addition to instructions.
1102  switch (Opcode) {
1103  default: llvm_unreachable("Unknown instruction type encountered!");
1104  // Build the switch statement using the Instruction.def file.
1105 #define HANDLE_INST(NUM, OPCODE, CLASS) \
1106  case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
1107 #include "llvm/IR/Instruction.def"
1108  }
1109 }
1110 
1112  const DIExpression *Expr) {
1113  for (auto &DDIMI : DanglingDebugInfoMap)
1114  for (auto &DDI : DDIMI.second)
1115  if (DDI.getDI()) {
1116  const DbgValueInst *DI = DDI.getDI();
1117  DIVariable *DanglingVariable = DI->getVariable();
1118  DIExpression *DanglingExpr = DI->getExpression();
1119  if (DanglingVariable == Variable &&
1120  Expr->fragmentsOverlap(DanglingExpr)) {
1121  LLVM_DEBUG(dbgs()
1122  << "Dropping dangling debug info for " << *DI << "\n");
1123  DDI = DanglingDebugInfo();
1124  }
1125  }
1126 }
1127 
1128 // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
1129 // generate the debug data structures now that we've seen its definition.
1131  SDValue Val) {
1132  DanglingDebugInfoVector &DDIV = DanglingDebugInfoMap[V];
1133  for (auto &DDI : DDIV) {
1134  if (!DDI.getDI())
1135  continue;
1136  const DbgValueInst *DI = DDI.getDI();
1137  DebugLoc dl = DDI.getdl();
1138  unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
1139  unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
1140  DILocalVariable *Variable = DI->getVariable();
1141  DIExpression *Expr = DI->getExpression();
1142  assert(Variable->isValidLocationForIntrinsic(dl) &&
1143  "Expected inlined-at fields to agree");
1144  SDDbgValue *SDV;
1145  if (Val.getNode()) {
1146  if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
1147  LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
1148  << DbgSDNodeOrder << "] for:\n " << *DI << "\n");
1149  LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
1150  // Increase the SDNodeOrder for the DbgValue here to make sure it is
1151  // inserted after the definition of Val when emitting the instructions
1152  // after ISel. An alternative could be to teach
1153  // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
1154  LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
1155  << "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
1156  << ValSDNodeOrder << "\n");
1157  SDV = getDbgValue(Val, Variable, Expr, dl,
1158  std::max(DbgSDNodeOrder, ValSDNodeOrder));
1159  DAG.AddDbgValue(SDV, Val.getNode(), false);
1160  } else
1161  LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
1162  << "in EmitFuncArgumentDbgValue\n");
1163  } else
1164  LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1165  }
1166  DanglingDebugInfoMap[V].clear();
1167 }
1168 
1169 /// getCopyFromRegs - If there was virtual register allocated for the value V
1170 /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
1172  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
1173  SDValue Result;
1174 
1175  if (It != FuncInfo.ValueMap.end()) {
1176  unsigned InReg = It->second;
1177 
1178  RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
1179  DAG.getDataLayout(), InReg, Ty, isABIRegCopy(V));
1180  SDValue Chain = DAG.getEntryNode();
1181  Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
1182  V);
1183  resolveDanglingDebugInfo(V, Result);
1184  }
1185 
1186  return Result;
1187 }
1188 
1189 /// getValue - Return an SDValue for the given Value.
1191  // If we already have an SDValue for this value, use it. It's important
1192  // to do this first, so that we don't create a CopyFromReg if we already
1193  // have a regular SDValue.
1194  SDValue &N = NodeMap[V];
1195  if (N.getNode()) return N;
1196 
1197  // If there's a virtual register allocated and initialized for this
1198  // value, use it.
1199  if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
1200  return copyFromReg;
1201 
1202  // Otherwise create a new SDValue and remember it.
1203  SDValue Val = getValueImpl(V);
1204  NodeMap[V] = Val;
1205  resolveDanglingDebugInfo(V, Val);
1206  return Val;
1207 }
1208 
1209 // Return true if SDValue exists for the given Value
1211  return (NodeMap.find(V) != NodeMap.end()) ||
1212  (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
1213 }
1214 
1215 /// getNonRegisterValue - Return an SDValue for the given Value, but
1216 /// don't look in FuncInfo.ValueMap for a virtual register.
1218  // If we already have an SDValue for this value, use it.
1219  SDValue &N = NodeMap[V];
1220  if (N.getNode()) {
1221  if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
1222  // Remove the debug location from the node as the node is about to be used
1223  // in a location which may differ from the original debug location. This
1224  // is relevant to Constant and ConstantFP nodes because they can appear
1225  // as constant expressions inside PHI nodes.
1226  N->setDebugLoc(DebugLoc());
1227  }
1228  return N;
1229  }
1230 
1231  // Otherwise create a new SDValue and remember it.
1232  SDValue Val = getValueImpl(V);
1233  NodeMap[V] = Val;
1234  resolveDanglingDebugInfo(V, Val);
1235  return Val;
1236 }
1237 
1238 /// getValueImpl - Helper function for getValue and getNonRegisterValue.
1239 /// Create an SDValue for the given value.
1241  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1242 
1243  if (const Constant *C = dyn_cast<Constant>(V)) {
1244  EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
1245 
1246  if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
1247  return DAG.getConstant(*CI, getCurSDLoc(), VT);
1248 
1249  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
1250  return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
1251 
1252  if (isa<ConstantPointerNull>(C)) {
1253  unsigned AS = V->getType()->getPointerAddressSpace();
1254  return DAG.getConstant(0, getCurSDLoc(),
1255  TLI.getPointerTy(DAG.getDataLayout(), AS));
1256  }
1257 
1258  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
1259  return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
1260 
1261  if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
1262  return DAG.getUNDEF(VT);
1263 
1264  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
1265  visit(CE->getOpcode(), *CE);
1266  SDValue N1 = NodeMap[V];
1267  assert(N1.getNode() && "visit didn't populate the NodeMap!");
1268  return N1;
1269  }
1270 
1271  if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
1273  for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
1274  OI != OE; ++OI) {
1275  SDNode *Val = getValue(*OI).getNode();
1276  // If the operand is an empty aggregate, there are no values.
1277  if (!Val) continue;
1278  // Add each leaf value from the operand to the Constants list
1279  // to form a flattened list of all the values.
1280  for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1281  Constants.push_back(SDValue(Val, i));
1282  }
1283 
1284  return DAG.getMergeValues(Constants, getCurSDLoc());
1285  }
1286 
1287  if (const ConstantDataSequential *CDS =
1288  dyn_cast<ConstantDataSequential>(C)) {
1290  for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
1291  SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
1292  // Add each leaf value from the operand to the Constants list
1293  // to form a flattened list of all the values.
1294  for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1295  Ops.push_back(SDValue(Val, i));
1296  }
1297 
1298  if (isa<ArrayType>(CDS->getType()))
1299  return DAG.getMergeValues(Ops, getCurSDLoc());
1300  return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1301  }
1302 
1303  if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
1304  assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
1305  "Unknown struct or array constant!");
1306 
1308  ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
1309  unsigned NumElts = ValueVTs.size();
1310  if (NumElts == 0)
1311  return SDValue(); // empty struct
1313  for (unsigned i = 0; i != NumElts; ++i) {
1314  EVT EltVT = ValueVTs[i];
1315  if (isa<UndefValue>(C))
1316  Constants[i] = DAG.getUNDEF(EltVT);
1317  else if (EltVT.isFloatingPoint())
1318  Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1319  else
1320  Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
1321  }
1322 
1323  return DAG.getMergeValues(Constants, getCurSDLoc());
1324  }
1325 
1326  if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
1327  return DAG.getBlockAddress(BA, VT);
1328 
1329  VectorType *VecTy = cast<VectorType>(V->getType());
1330  unsigned NumElements = VecTy->getNumElements();
1331 
1332  // Now that we know the number and type of the elements, get that number of
1333  // elements into the Ops array based on what kind of constant it is.
1335  if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
1336  for (unsigned i = 0; i != NumElements; ++i)
1337  Ops.push_back(getValue(CV->getOperand(i)));
1338  } else {
1339  assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
1340  EVT EltVT =
1341  TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
1342 
1343  SDValue Op;
1344  if (EltVT.isFloatingPoint())
1345  Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1346  else
1347  Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
1348  Ops.assign(NumElements, Op);
1349  }
1350 
1351  // Create a BUILD_VECTOR node.
1352  return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1353  }
1354 
1355  // If this is a static alloca, generate it as the frameindex instead of
1356  // computation.
1357  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
1359  FuncInfo.StaticAllocaMap.find(AI);
1360  if (SI != FuncInfo.StaticAllocaMap.end())
1361  return DAG.getFrameIndex(SI->second,
1362  TLI.getFrameIndexTy(DAG.getDataLayout()));
1363  }
1364 
1365  // If this is an instruction which fast-isel has deferred, select it now.
1366  if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
1367  unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
1368 
1369  RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
1370  Inst->getType(), isABIRegCopy(V));
1371  SDValue Chain = DAG.getEntryNode();
1372  return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
1373  }
1374 
1375  llvm_unreachable("Can't get register for value!");
1376 }
1377 
1378 void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
1379  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1380  bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
1381  bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
1382  bool IsSEH = isAsynchronousEHPersonality(Pers);
1383  bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX;
1384  MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
1385  if (!IsSEH)
1386  CatchPadMBB->setIsEHScopeEntry();
1387  // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
1388  if (IsMSVCCXX || IsCoreCLR)
1389  CatchPadMBB->setIsEHFuncletEntry();
1390  // Wasm does not need catchpads anymore
1391  if (!IsWasmCXX)
1392  DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other,
1393  getControlRoot()));
1394 }
1395 
1396 void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
1397  // Update machine-CFG edge.
1398  MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
1399  FuncInfo.MBB->addSuccessor(TargetMBB);
1400 
1401  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1402  bool IsSEH = isAsynchronousEHPersonality(Pers);
1403  if (IsSEH) {
1404  // If this is not a fall-through branch or optimizations are switched off,
1405  // emit the branch.
1406  if (TargetMBB != NextBlock(FuncInfo.MBB) ||
1407  TM.getOptLevel() == CodeGenOpt::None)
1408  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
1409  getControlRoot(), DAG.getBasicBlock(TargetMBB)));
1410  return;
1411  }
1412 
1413  // Figure out the funclet membership for the catchret's successor.
1414  // This will be used by the FuncletLayout pass to determine how to order the
1415  // BB's.
1416  // A 'catchret' returns to the outer scope's color.
1417  Value *ParentPad = I.getCatchSwitchParentPad();
1418  const BasicBlock *SuccessorColor;
1419  if (isa<ConstantTokenNone>(ParentPad))
1420  SuccessorColor = &FuncInfo.Fn->getEntryBlock();
1421  else
1422  SuccessorColor = cast<Instruction>(ParentPad)->getParent();
1423  assert(SuccessorColor && "No parent funclet for catchret!");
1424  MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
1425  assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
1426 
1427  // Create the terminator node.
1428  SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
1429  getControlRoot(), DAG.getBasicBlock(TargetMBB),
1430  DAG.getBasicBlock(SuccessorColorMBB));
1431  DAG.setRoot(Ret);
1432 }
1433 
1434 void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
1435  // Don't emit any special code for the cleanuppad instruction. It just marks
1436  // the start of an EH scope/funclet.
1437  FuncInfo.MBB->setIsEHScopeEntry();
1438  FuncInfo.MBB->setIsEHFuncletEntry();
1439  FuncInfo.MBB->setIsCleanupFuncletEntry();
1440 }
1441 
1442 /// When an invoke or a cleanupret unwinds to the next EH pad, there are
1443 /// many places it could ultimately go. In the IR, we have a single unwind
1444 /// destination, but in the machine CFG, we enumerate all the possible blocks.
1445 /// This function skips over imaginary basic blocks that hold catchswitch
1446 /// instructions, and finds all the "real" machine
1447 /// basic block destinations. As those destinations may not be successors of
1448 /// EHPadBB, here we also calculate the edge probability to those destinations.
1449 /// The passed-in Prob is the edge probability to EHPadBB.
1451  FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
1452  BranchProbability Prob,
1453  SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
1454  &UnwindDests) {
1455  EHPersonality Personality =
1457  bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
1458  bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
1459  bool IsSEH = isAsynchronousEHPersonality(Personality);
1460 
1461  while (EHPadBB) {
1462  const Instruction *Pad = EHPadBB->getFirstNonPHI();
1463  BasicBlock *NewEHPadBB = nullptr;
1464  if (isa<LandingPadInst>(Pad)) {
1465  // Stop on landingpads. They are not funclets.
1466  UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1467  break;
1468  } else if (isa<CleanupPadInst>(Pad)) {
1469  // Stop on cleanup pads. Cleanups are always funclet entries for all known
1470  // personalities.
1471  UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1472  UnwindDests.back().first->setIsEHScopeEntry();
1473  UnwindDests.back().first->setIsEHFuncletEntry();
1474  break;
1475  } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
1476  // Add the catchpad handlers to the possible destinations.
1477  for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
1478  UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
1479  // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
1480  if (IsMSVCCXX || IsCoreCLR)
1481  UnwindDests.back().first->setIsEHFuncletEntry();
1482  if (!IsSEH)
1483  UnwindDests.back().first->setIsEHScopeEntry();
1484  }
1485  NewEHPadBB = CatchSwitch->getUnwindDest();
1486  } else {
1487  continue;
1488  }
1489 
1490  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1491  if (BPI && NewEHPadBB)
1492  Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
1493  EHPadBB = NewEHPadBB;
1494  }
1495 }
1496 
1497 void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
1498  // Update successor info.
1500  auto UnwindDest = I.getUnwindDest();
1501  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1502  BranchProbability UnwindDestProb =
1503  (BPI && UnwindDest)
1504  ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
1506  findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
1507  for (auto &UnwindDest : UnwindDests) {
1508  UnwindDest.first->setIsEHPad();
1509  addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
1510  }
1511  FuncInfo.MBB->normalizeSuccProbs();
1512 
1513  // Create the terminator node.
1514  SDValue Ret =
1515  DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
1516  DAG.setRoot(Ret);
1517 }
1518 
1519 void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
1520  report_fatal_error("visitCatchSwitch not yet implemented!");
1521 }
1522 
1523 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
1524  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1525  auto &DL = DAG.getDataLayout();
1526  SDValue Chain = getControlRoot();
1528  SmallVector<SDValue, 8> OutVals;
1529 
1530  // Calls to @llvm.experimental.deoptimize don't generate a return value, so
1531  // lower
1532  //
1533  // %val = call <ty> @llvm.experimental.deoptimize()
1534  // ret <ty> %val
1535  //
1536  // differently.
1538  LowerDeoptimizingReturn();
1539  return;
1540  }
1541 
1542  if (!FuncInfo.CanLowerReturn) {
1543  unsigned DemoteReg = FuncInfo.DemoteRegister;
1544  const Function *F = I.getParent()->getParent();
1545 
1546  // Emit a store of the return value through the virtual register.
1547  // Leave Outs empty so that LowerReturn won't try to load return
1548  // registers the usual way.
1549  SmallVector<EVT, 1> PtrValueVTs;
1550  ComputeValueVTs(TLI, DL,
1553  PtrValueVTs);
1554 
1555  SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
1556  DemoteReg, PtrValueVTs[0]);
1557  SDValue RetOp = getValue(I.getOperand(0));
1558 
1561  ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
1562  unsigned NumValues = ValueVTs.size();
1563 
1564  SmallVector<SDValue, 4> Chains(NumValues);
1565  for (unsigned i = 0; i != NumValues; ++i) {
1566  // An aggregate return value cannot wrap around the address space, so
1567  // offsets to its parts don't wrap either.
1568  SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
1569  Chains[i] = DAG.getStore(
1570  Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i),
1571  // FIXME: better loc info would be nice.
1573  }
1574 
1575  Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
1576  MVT::Other, Chains);
1577  } else if (I.getNumOperands() != 0) {
1579  ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
1580  unsigned NumValues = ValueVTs.size();
1581  if (NumValues) {
1582  SDValue RetOp = getValue(I.getOperand(0));
1583 
1584  const Function *F = I.getParent()->getParent();
1585 
1586  ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
1587  if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1588  Attribute::SExt))
1589  ExtendKind = ISD::SIGN_EXTEND;
1590  else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1591  Attribute::ZExt))
1592  ExtendKind = ISD::ZERO_EXTEND;
1593 
1594  LLVMContext &Context = F->getContext();
1595  bool RetInReg = F->getAttributes().hasAttribute(
1596  AttributeList::ReturnIndex, Attribute::InReg);
1597 
1598  for (unsigned j = 0; j != NumValues; ++j) {
1599  EVT VT = ValueVTs[j];
1600 
1601  if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
1602  VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
1603 
1604  unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT);
1605  MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT);
1606  SmallVector<SDValue, 4> Parts(NumParts);
1607  getCopyToParts(DAG, getCurSDLoc(),
1608  SDValue(RetOp.getNode(), RetOp.getResNo() + j),
1609  &Parts[0], NumParts, PartVT, &I, ExtendKind, true);
1610 
1611  // 'inreg' on function refers to return value
1612  ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1613  if (RetInReg)
1614  Flags.setInReg();
1615 
1616  // Propagate extension type if any
1617  if (ExtendKind == ISD::SIGN_EXTEND)
1618  Flags.setSExt();
1619  else if (ExtendKind == ISD::ZERO_EXTEND)
1620  Flags.setZExt();
1621 
1622  for (unsigned i = 0; i < NumParts; ++i) {
1623  Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
1624  VT, /*isfixed=*/true, 0, 0));
1625  OutVals.push_back(Parts[i]);
1626  }
1627  }
1628  }
1629  }
1630 
1631  // Push in swifterror virtual register as the last element of Outs. This makes
1632  // sure swifterror virtual register will be returned in the swifterror
1633  // physical register.
1634  const Function *F = I.getParent()->getParent();
1635  if (TLI.supportSwiftError() &&
1636  F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
1637  assert(FuncInfo.SwiftErrorArg && "Need a swift error argument");
1638  ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1639  Flags.setSwiftError();
1640  Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
1641  EVT(TLI.getPointerTy(DL)) /*argvt*/,
1642  true /*isfixed*/, 1 /*origidx*/,
1643  0 /*partOffs*/));
1644  // Create SDNode for the swifterror virtual register.
1645  OutVals.push_back(
1646  DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt(
1647  &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first,
1648  EVT(TLI.getPointerTy(DL))));
1649  }
1650 
1651  bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
1652  CallingConv::ID CallConv =
1654  Chain = DAG.getTargetLoweringInfo().LowerReturn(
1655  Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
1656 
1657  // Verify that the target's LowerReturn behaved as expected.
1658  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
1659  "LowerReturn didn't return a valid chain!");
1660 
1661  // Update the DAG with the new chain value resulting from return lowering.
1662  DAG.setRoot(Chain);
1663 }
1664 
1665 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
1666 /// created for it, emit nodes to copy the value into the virtual
1667 /// registers.
1669  // Skip empty types
1670  if (V->getType()->isEmptyTy())
1671  return;
1672 
1673  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
1674  if (VMI != FuncInfo.ValueMap.end()) {
1675  assert(!V->use_empty() && "Unused value assigned virtual registers!");
1676  CopyValueToVirtualRegister(V, VMI->second);
1677  }
1678 }
1679 
1680 /// ExportFromCurrentBlock - If this condition isn't known to be exported from
1681 /// the current basic block, add it to ValueMap now so that we'll get a
1682 /// CopyTo/FromReg.
1684  // No need to export constants.
1685  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
1686 
1687  // Already exported?
1688  if (FuncInfo.isExportedInst(V)) return;
1689 
1690  unsigned Reg = FuncInfo.InitializeRegForValue(V);
1691  CopyValueToVirtualRegister(V, Reg);
1692 }
1693 
1695  const BasicBlock *FromBB) {
1696  // The operands of the setcc have to be in this block. We don't know
1697  // how to export them from some other block.
1698  if (const Instruction *VI = dyn_cast<Instruction>(V)) {
1699  // Can export from current BB.
1700  if (VI->getParent() == FromBB)
1701  return true;
1702 
1703  // Is already exported, noop.
1704  return FuncInfo.isExportedInst(V);
1705  }
1706 
1707  // If this is an argument, we can export it if the BB is the entry block or
1708  // if it is already exported.
1709  if (isa<Argument>(V)) {
1710  if (FromBB == &FromBB->getParent()->getEntryBlock())
1711  return true;
1712 
1713  // Otherwise, can only export this if it is already exported.
1714  return FuncInfo.isExportedInst(V);
1715  }
1716 
1717  // Otherwise, constants can always be exported.
1718  return true;
1719 }
1720 
1721 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
1723 SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
1724  const MachineBasicBlock *Dst) const {
1725  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1726  const BasicBlock *SrcBB = Src->getBasicBlock();
1727  const BasicBlock *DstBB = Dst->getBasicBlock();
1728  if (!BPI) {
1729  // If BPI is not available, set the default probability as 1 / N, where N is
1730  // the number of successors.
1731  auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
1732  return BranchProbability(1, SuccSize);
1733  }
1734  return BPI->getEdgeProbability(SrcBB, DstBB);
1735 }
1736 
1737 void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
1738  MachineBasicBlock *Dst,
1739  BranchProbability Prob) {
1740  if (!FuncInfo.BPI)
1741  Src->addSuccessorWithoutProb(Dst);
1742  else {
1743  if (Prob.isUnknown())
1744  Prob = getEdgeProbability(Src, Dst);
1745  Src->addSuccessor(Dst, Prob);
1746  }
1747 }
1748 
1749 static bool InBlock(const Value *V, const BasicBlock *BB) {
1750  if (const Instruction *I = dyn_cast<Instruction>(V))
1751  return I->getParent() == BB;
1752  return true;
1753 }
1754 
1755 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
1756 /// This function emits a branch and is used at the leaves of an OR or an
1757 /// AND operator tree.
1758 void
1760  MachineBasicBlock *TBB,
1761  MachineBasicBlock *FBB,
1762  MachineBasicBlock *CurBB,
1763  MachineBasicBlock *SwitchBB,
1764  BranchProbability TProb,
1765  BranchProbability FProb,
1766  bool InvertCond) {
1767  const BasicBlock *BB = CurBB->getBasicBlock();
1768 
1769  // If the leaf of the tree is a comparison, merge the condition into
1770  // the caseblock.
1771  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
1772  // The operands of the cmp have to be in this block. We don't know
1773  // how to export them from some other block. If this is the first block
1774  // of the sequence, no exporting is needed.
1775  if (CurBB == SwitchBB ||
1776  (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
1777  isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
1778  ISD::CondCode Condition;
1779  if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
1780  ICmpInst::Predicate Pred =
1781  InvertCond ? IC->getInversePredicate() : IC->getPredicate();
1782  Condition = getICmpCondCode(Pred);
1783  } else {
1784  const FCmpInst *FC = cast<FCmpInst>(Cond);
1785  FCmpInst::Predicate Pred =
1786  InvertCond ? FC->getInversePredicate() : FC->getPredicate();
1787  Condition = getFCmpCondCode(Pred);
1788  if (TM.Options.NoNaNsFPMath)
1789  Condition = getFCmpCodeWithoutNaN(Condition);
1790  }
1791 
1792  CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
1793  TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1794  SwitchCases.push_back(CB);
1795  return;
1796  }
1797  }
1798 
1799  // Create a CaseBlock record representing this branch.
1800  ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
1801  CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
1802  nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1803  SwitchCases.push_back(CB);
1804 }
1805 
1806 /// FindMergedConditions - If Cond is an expression like
1808  MachineBasicBlock *TBB,
1809  MachineBasicBlock *FBB,
1810  MachineBasicBlock *CurBB,
1811  MachineBasicBlock *SwitchBB,
1813  BranchProbability TProb,
1814  BranchProbability FProb,
1815  bool InvertCond) {
1816  // Skip over not part of the tree and remember to invert op and operands at
1817  // next level.
1818  if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) {
1819  const Value *CondOp = BinaryOperator::getNotArgument(Cond);
1820  if (InBlock(CondOp, CurBB->getBasicBlock())) {
1821  FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
1822  !InvertCond);
1823  return;
1824  }
1825  }
1826 
1827  const Instruction *BOp = dyn_cast<Instruction>(Cond);
1828  // Compute the effective opcode for Cond, taking into account whether it needs
1829  // to be inverted, e.g.
1830  // and (not (or A, B)), C
1831  // gets lowered as
1832  // and (and (not A, not B), C)
1833  unsigned BOpc = 0;
1834  if (BOp) {
1835  BOpc = BOp->getOpcode();
1836  if (InvertCond) {
1837  if (BOpc == Instruction::And)
1838  BOpc = Instruction::Or;
1839  else if (BOpc == Instruction::Or)
1840  BOpc = Instruction::And;
1841  }
1842  }
1843 
1844  // If this node is not part of the or/and tree, emit it as a branch.
1845  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
1846  BOpc != unsigned(Opc) || !BOp->hasOneUse() ||
1847  BOp->getParent() != CurBB->getBasicBlock() ||
1848  !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
1849  !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
1850  EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
1851  TProb, FProb, InvertCond);
1852  return;
1853  }
1854 
1855  // Create TmpBB after CurBB.
1856  MachineFunction::iterator BBI(CurBB);
1857  MachineFunction &MF = DAG.getMachineFunction();
1859  CurBB->getParent()->insert(++BBI, TmpBB);
1860 
1861  if (Opc == Instruction::Or) {
1862  // Codegen X | Y as:
1863  // BB1:
1864  // jmp_if_X TBB
1865  // jmp TmpBB
1866  // TmpBB:
1867  // jmp_if_Y TBB
1868  // jmp FBB
1869  //
1870 
1871  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1872  // The requirement is that
1873  // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
1874  // = TrueProb for original BB.
1875  // Assuming the original probabilities are A and B, one choice is to set
1876  // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
1877  // A/(1+B) and 2B/(1+B). This choice assumes that
1878  // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
1879  // Another choice is to assume TrueProb for BB1 equals to TrueProb for
1880  // TmpBB, but the math is more complicated.
1881 
1882  auto NewTrueProb = TProb / 2;
1883  auto NewFalseProb = TProb / 2 + FProb;
1884  // Emit the LHS condition.
1885  FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
1886  NewTrueProb, NewFalseProb, InvertCond);
1887 
1888  // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
1889  SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
1890  BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1891  // Emit the RHS condition into TmpBB.
1892  FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1893  Probs[0], Probs[1], InvertCond);
1894  } else {
1895  assert(Opc == Instruction::And && "Unknown merge op!");
1896  // Codegen X & Y as:
1897  // BB1:
1898  // jmp_if_X TmpBB
1899  // jmp FBB
1900  // TmpBB:
1901  // jmp_if_Y TBB
1902  // jmp FBB
1903  //
1904  // This requires creation of TmpBB after CurBB.
1905 
1906  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1907  // The requirement is that
1908  // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
1909  // = FalseProb for original BB.
1910  // Assuming the original probabilities are A and B, one choice is to set
1911  // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
1912  // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
1913  // TrueProb for BB1 * FalseProb for TmpBB.
1914 
1915  auto NewTrueProb = TProb + FProb / 2;
1916  auto NewFalseProb = FProb / 2;
1917  // Emit the LHS condition.
1918  FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
1919  NewTrueProb, NewFalseProb, InvertCond);
1920 
1921  // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
1922  SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
1923  BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1924  // Emit the RHS condition into TmpBB.
1925  FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1926  Probs[0], Probs[1], InvertCond);
1927  }
1928 }
1929 
1930 /// If the set of cases should be emitted as a series of branches, return true.
1931 /// If we should emit this as a bunch of and/or'd together conditions, return
1932 /// false.
1933 bool
1934 SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
1935  if (Cases.size() != 2) return true;
1936 
1937  // If this is two comparisons of the same values or'd or and'd together, they
1938  // will get folded into a single comparison, so don't emit two blocks.
1939  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
1940  Cases[0].CmpRHS == Cases[1].CmpRHS) ||
1941  (Cases[0].CmpRHS == Cases[1].CmpLHS &&
1942  Cases[0].CmpLHS == Cases[1].CmpRHS)) {
1943  return false;
1944  }
1945 
1946  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
1947  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
1948  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
1949  Cases[0].CC == Cases[1].CC &&
1950  isa<Constant>(Cases[0].CmpRHS) &&
1951  cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
1952  if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
1953  return false;
1954  if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
1955  return false;
1956  }
1957 
1958  return true;
1959 }
1960 
1961 void SelectionDAGBuilder::visitBr(const BranchInst &I) {
1962  MachineBasicBlock *BrMBB = FuncInfo.MBB;
1963 
1964  // Update machine-CFG edges.
1965  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
1966 
1967  if (I.isUnconditional()) {
1968  // Update machine-CFG edges.
1969  BrMBB->addSuccessor(Succ0MBB);
1970 
1971  // If this is not a fall-through branch or optimizations are switched off,
1972  // emit the branch.
1973  if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
1974  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
1975  MVT::Other, getControlRoot(),
1976  DAG.getBasicBlock(Succ0MBB)));
1977 
1978  return;
1979  }
1980 
1981  // If this condition is one of the special cases we handle, do special stuff
1982  // now.
1983  const Value *CondVal = I.getCondition();
1984  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
1985 
1986  // If this is a series of conditions that are or'd or and'd together, emit
1987  // this as a sequence of branches instead of setcc's with and/or operations.
1988  // As long as jumps are not expensive, this should improve performance.
1989  // For example, instead of something like:
1990  // cmp A, B
1991  // C = seteq
1992  // cmp D, E
1993  // F = setle
1994  // or C, F
1995  // jnz foo
1996  // Emit:
1997  // cmp A, B
1998  // je foo
1999  // cmp D, E
2000  // jle foo
2001  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
2002  Instruction::BinaryOps Opcode = BOp->getOpcode();
2003  if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
2005  (Opcode == Instruction::And || Opcode == Instruction::Or)) {
2006  FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
2007  Opcode,
2008  getEdgeProbability(BrMBB, Succ0MBB),
2009  getEdgeProbability(BrMBB, Succ1MBB),
2010  /*InvertCond=*/false);
2011  // If the compares in later blocks need to use values not currently
2012  // exported from this block, export them now. This block should always
2013  // be the first entry.
2014  assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
2015 
2016  // Allow some cases to be rejected.
2017  if (ShouldEmitAsBranches(SwitchCases)) {
2018  for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
2019  ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
2020  ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
2021  }
2022 
2023  // Emit the branch for this block.
2024  visitSwitchCase(SwitchCases[0], BrMBB);
2025  SwitchCases.erase(SwitchCases.begin());
2026  return;
2027  }
2028 
2029  // Okay, we decided not to do this, remove any inserted MBB's and clear
2030  // SwitchCases.
2031  for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
2032  FuncInfo.MF->erase(SwitchCases[i].ThisBB);
2033 
2034  SwitchCases.clear();
2035  }
2036  }
2037 
2038  // Create a CaseBlock record representing this branch.
2039  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
2040  nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
2041 
2042  // Use visitSwitchCase to actually insert the fast branch sequence for this
2043  // cond branch.
2044  visitSwitchCase(CB, BrMBB);
2045 }
2046 
2047 /// visitSwitchCase - Emits the necessary code to represent a single node in
2048 /// the binary search tree resulting from lowering a switch instruction.
2050  MachineBasicBlock *SwitchBB) {
2051  SDValue Cond;
2052  SDValue CondLHS = getValue(CB.CmpLHS);
2053  SDLoc dl = CB.DL;
2054 
2055  // Build the setcc now.
2056  if (!CB.CmpMHS) {
2057  // Fold "(X == true)" to X and "(X == false)" to !X to
2058  // handle common cases produced by branch lowering.
2059  if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
2060  CB.CC == ISD::SETEQ)
2061  Cond = CondLHS;
2062  else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
2063  CB.CC == ISD::SETEQ) {
2064  SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
2065  Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
2066  } else
2067  Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
2068  } else {
2069  assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
2070 
2071  const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
2072  const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
2073 
2074  SDValue CmpOp = getValue(CB.CmpMHS);
2075  EVT VT = CmpOp.getValueType();
2076 
2077  if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
2078  Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
2079  ISD::SETLE);
2080  } else {
2081  SDValue SUB = DAG.getNode(ISD::SUB, dl,
2082  VT, CmpOp, DAG.getConstant(Low, dl, VT));
2083  Cond = DAG.getSetCC(dl, MVT::i1, SUB,
2084  DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
2085  }
2086  }
2087 
2088  // Update successor info
2089  addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
2090  // TrueBB and FalseBB are always different unless the incoming IR is
2091  // degenerate. This only happens when running llc on weird IR.
2092  if (CB.TrueBB != CB.FalseBB)
2093  addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
2094  SwitchBB->normalizeSuccProbs();
2095 
2096  // If the lhs block is the next block, invert the condition so that we can
2097  // fall through to the lhs instead of the rhs block.
2098  if (CB.TrueBB == NextBlock(SwitchBB)) {
2099  std::swap(CB.TrueBB, CB.FalseBB);
2100  SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
2101  Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
2102  }
2103 
2104  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2105  MVT::Other, getControlRoot(), Cond,
2106  DAG.getBasicBlock(CB.TrueBB));
2107 
2108  // Insert the false branch. Do this even if it's a fall through branch,
2109  // this makes it easier to do DAG optimizations which require inverting
2110  // the branch condition.
2111  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2112  DAG.getBasicBlock(CB.FalseBB));
2113 
2114  DAG.setRoot(BrCond);
2115 }
2116 
2117 /// visitJumpTable - Emit JumpTable node in the current MBB
2119  // Emit the code for the jump table
2120  assert(JT.Reg != -1U && "Should lower JT Header first!");
2122  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
2123  JT.Reg, PTy);
2124  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
2125  SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
2126  MVT::Other, Index.getValue(1),
2127  Table, Index);
2128  DAG.setRoot(BrJumpTable);
2129 }
2130 
2131 /// visitJumpTableHeader - This function emits necessary code to produce index
2132 /// in the JumpTable from switch case.
2134  JumpTableHeader &JTH,
2135  MachineBasicBlock *SwitchBB) {
2136  SDLoc dl = getCurSDLoc();
2137 
2138  // Subtract the lowest switch case value from the value being switched on and
2139  // conditional branch to default mbb if the result is greater than the
2140  // difference between smallest and largest cases.
2141  SDValue SwitchOp = getValue(JTH.SValue);
2142  EVT VT = SwitchOp.getValueType();
2143  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2144  DAG.getConstant(JTH.First, dl, VT));
2145 
2146  // The SDNode we just created, which holds the value being switched on minus
2147  // the smallest case value, needs to be copied to a virtual register so it
2148  // can be used as an index into the jump table in a subsequent basic block.
2149  // This value may be smaller or larger than the target's pointer type, and
2150  // therefore require extension or truncating.
2151  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2152  SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
2153 
2154  unsigned JumpTableReg =
2155  FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
2156  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
2157  JumpTableReg, SwitchOp);
2158  JT.Reg = JumpTableReg;
2159 
2160  // Emit the range check for the jump table, and branch to the default block
2161  // for the switch statement if the value being switched on exceeds the largest
2162  // case in the switch.
2163  SDValue CMP = DAG.getSetCC(
2164  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2165  Sub.getValueType()),
2166  Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
2167 
2168  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2169  MVT::Other, CopyTo, CMP,
2170  DAG.getBasicBlock(JT.Default));
2171 
2172  // Avoid emitting unnecessary branches to the next block.
2173  if (JT.MBB != NextBlock(SwitchBB))
2174  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2175  DAG.getBasicBlock(JT.MBB));
2176 
2177  DAG.setRoot(BrCond);
2178 }
2179 
2180 /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
2181 /// variable if there exists one.
2183  SDValue &Chain) {
2184  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2185  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2186  MachineFunction &MF = DAG.getMachineFunction();
2187  Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
2188  MachineSDNode *Node =
2189  DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
2190  if (Global) {
2191  MachinePointerInfo MPInfo(Global);
2195  *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8,
2196  DAG.getEVTAlignment(PtrTy));
2197  Node->setMemRefs(MemRefs, MemRefs + 1);
2198  }
2199  return SDValue(Node, 0);
2200 }
2201 
2202 /// Codegen a new tail for a stack protector check ParentMBB which has had its
2203 /// tail spliced into a stack protector check success bb.
2204 ///
2205 /// For a high level explanation of how this fits into the stack protector
2206 /// generation see the comment on the declaration of class
2207 /// StackProtectorDescriptor.
2208 void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
2209  MachineBasicBlock *ParentBB) {
2210 
2211  // First create the loads to the guard/stack slot for the comparison.
2212  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2213  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2214 
2215  MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
2216  int FI = MFI.getStackProtectorIndex();
2217 
2218  SDValue Guard;
2219  SDLoc dl = getCurSDLoc();
2220  SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
2221  const Module &M = *ParentBB->getParent()->getFunction().getParent();
2222  unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
2223 
2224  // Generate code to load the content of the guard slot.
2225  SDValue GuardVal = DAG.getLoad(
2226  PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
2229 
2230  if (TLI.useStackGuardXorFP())
2231  GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
2232 
2233  // Retrieve guard check function, nullptr if instrumentation is inlined.
2234  if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
2235  // The target provides a guard check function to validate the guard value.
2236  // Generate a call to that function with the content of the guard slot as
2237  // argument.
2238  auto *Fn = cast<Function>(GuardCheck);
2239  FunctionType *FnTy = Fn->getFunctionType();
2240  assert(FnTy->getNumParams() == 1 && "Invalid function signature");
2241 
2244  Entry.Node = GuardVal;
2245  Entry.Ty = FnTy->getParamType(0);
2246  if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
2247  Entry.IsInReg = true;
2248  Args.push_back(Entry);
2249 
2251  CLI.setDebugLoc(getCurSDLoc())
2252  .setChain(DAG.getEntryNode())
2253  .setCallee(Fn->getCallingConv(), FnTy->getReturnType(),
2254  getValue(GuardCheck), std::move(Args));
2255 
2256  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
2257  DAG.setRoot(Result.second);
2258  return;
2259  }
2260 
2261  // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
2262  // Otherwise, emit a volatile load to retrieve the stack guard value.
2263  SDValue Chain = DAG.getEntryNode();
2264  if (TLI.useLoadStackGuardNode()) {
2265  Guard = getLoadStackGuard(DAG, dl, Chain);
2266  } else {
2267  const Value *IRGuard = TLI.getSDagStackGuard(M);
2268  SDValue GuardPtr = getValue(IRGuard);
2269 
2270  Guard =
2271  DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0),
2273  }
2274 
2275  // Perform the comparison via a subtract/getsetcc.
2276  EVT VT = Guard.getValueType();
2277  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal);
2278 
2279  SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
2280  *DAG.getContext(),
2281  Sub.getValueType()),
2282  Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
2283 
2284  // If the sub is not 0, then we know the guard/stackslot do not equal, so
2285  // branch to failure MBB.
2286  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2287  MVT::Other, GuardVal.getOperand(0),
2288  Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
2289  // Otherwise branch to success MBB.
2290  SDValue Br = DAG.getNode(ISD::BR, dl,
2291  MVT::Other, BrCond,
2292  DAG.getBasicBlock(SPD.getSuccessMBB()));
2293 
2294  DAG.setRoot(Br);
2295 }
2296 
2297 /// Codegen the failure basic block for a stack protector check.
2298 ///
2299 /// A failure stack protector machine basic block consists simply of a call to
2300 /// __stack_chk_fail().
2301 ///
2302 /// For a high level explanation of how this fits into the stack protector
2303 /// generation see the comment on the declaration of class
2304 /// StackProtectorDescriptor.
2305 void
2306 SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
2307  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2308  SDValue Chain =
2309  TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
2310  None, false, getCurSDLoc(), false, false).second;
2311  DAG.setRoot(Chain);
2312 }
2313 
2314 /// visitBitTestHeader - This function emits necessary code to produce value
2315 /// suitable for "bit tests"
2317  MachineBasicBlock *SwitchBB) {
2318  SDLoc dl = getCurSDLoc();
2319 
2320  // Subtract the minimum value
2321  SDValue SwitchOp = getValue(B.SValue);
2322  EVT VT = SwitchOp.getValueType();
2323  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2324  DAG.getConstant(B.First, dl, VT));
2325 
2326  // Check range
2327  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2328  SDValue RangeCmp = DAG.getSetCC(
2329  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2330  Sub.getValueType()),
2331  Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
2332 
2333  // Determine the type of the test operands.
2334  bool UsePtrType = false;
2335  if (!TLI.isTypeLegal(VT))
2336  UsePtrType = true;
2337  else {
2338  for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
2339  if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
2340  // Switch table case range are encoded into series of masks.
2341  // Just use pointer type, it's guaranteed to fit.
2342  UsePtrType = true;
2343  break;
2344  }
2345  }
2346  if (UsePtrType) {
2347  VT = TLI.getPointerTy(DAG.getDataLayout());
2348  Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
2349  }
2350 
2351  B.RegVT = VT.getSimpleVT();
2352  B.Reg = FuncInfo.CreateReg(B.RegVT);
2353  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
2354 
2355  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
2356 
2357  addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
2358  addSuccessorWithProb(SwitchBB, MBB, B.Prob);
2359  SwitchBB->normalizeSuccProbs();
2360 
2361  SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
2362  MVT::Other, CopyTo, RangeCmp,
2363  DAG.getBasicBlock(B.Default));
2364 
2365  // Avoid emitting unnecessary branches to the next block.
2366  if (MBB != NextBlock(SwitchBB))
2367  BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
2368  DAG.getBasicBlock(MBB));
2369 
2370  DAG.setRoot(BrRange);
2371 }
2372 
2373 /// visitBitTestCase - this function produces one "bit test"
2375  MachineBasicBlock* NextMBB,
2376  BranchProbability BranchProbToNext,
2377  unsigned Reg,
2378  BitTestCase &B,
2379  MachineBasicBlock *SwitchBB) {
2380  SDLoc dl = getCurSDLoc();
2381  MVT VT = BB.RegVT;
2382  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
2383  SDValue Cmp;
2384  unsigned PopCount = countPopulation(B.Mask);
2385  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2386  if (PopCount == 1) {
2387  // Testing for a single bit; just compare the shift count with what it
2388  // would need to be to shift a 1 bit in that position.
2389  Cmp = DAG.getSetCC(
2390  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2391  ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
2392  ISD::SETEQ);
2393  } else if (PopCount == BB.Range) {
2394  // There is only one zero bit in the range, test for it directly.
2395  Cmp = DAG.getSetCC(
2396  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2397  ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
2398  ISD::SETNE);
2399  } else {
2400  // Make desired shift
2401  SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
2402  DAG.getConstant(1, dl, VT), ShiftOp);
2403 
2404  // Emit bit tests and jumps
2405  SDValue AndOp = DAG.getNode(ISD::AND, dl,
2406  VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
2407  Cmp = DAG.getSetCC(
2408  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2409  AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
2410  }
2411 
2412  // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
2413  addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
2414  // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
2415  addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
2416  // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
2417  // one as they are relative probabilities (and thus work more like weights),
2418  // and hence we need to normalize them to let the sum of them become one.
2419  SwitchBB->normalizeSuccProbs();
2420 
2421  SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
2422  MVT::Other, getControlRoot(),
2423  Cmp, DAG.getBasicBlock(B.TargetBB));
2424 
2425  // Avoid emitting unnecessary branches to the next block.
2426  if (NextMBB != NextBlock(SwitchBB))
2427  BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
2428  DAG.getBasicBlock(NextMBB));
2429 
2430  DAG.setRoot(BrAnd);
2431 }
2432 
2433 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
2434  MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
2435 
2436  // Retrieve successors. Look through artificial IR level blocks like
2437  // catchswitch for successors.
2438  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
2439  const BasicBlock *EHPadBB = I.getSuccessor(1);
2440 
2441  // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
2442  // have to do anything here to lower funclet bundles.
2444  {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
2445  "Cannot lower invokes with arbitrary operand bundles yet!");
2446 
2447  const Value *Callee(I.getCalledValue());
2448  const Function *Fn = dyn_cast<Function>(Callee);
2449  if (isa<InlineAsm>(Callee))
2450  visitInlineAsm(&I);
2451  else if (Fn && Fn->isIntrinsic()) {
2452  switch (Fn->getIntrinsicID()) {
2453  default:
2454  llvm_unreachable("Cannot invoke this intrinsic");
2455  case Intrinsic::donothing:
2456  // Ignore invokes to @llvm.donothing: jump directly to the next BB.
2457  break;
2458  case Intrinsic::experimental_patchpoint_void:
2459  case Intrinsic::experimental_patchpoint_i64:
2460  visitPatchpoint(&I, EHPadBB);
2461  break;
2462  case Intrinsic::experimental_gc_statepoint:
2463  LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
2464  break;
2465  }
2467  // Currently we do not lower any intrinsic calls with deopt operand bundles.
2468  // Eventually we will support lowering the @llvm.experimental.deoptimize
2469  // intrinsic, and right now there are no plans to support other intrinsics
2470  // with deopt state.
2471  LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
2472  } else {
2473  LowerCallTo(&I, getValue(Callee), false, EHPadBB);
2474  }
2475 
2476  // If the value of the invoke is used outside of its defining block, make it
2477  // available as a virtual register.
2478  // We already took care of the exported value for the statepoint instruction
2479  // during call to the LowerStatepoint.
2480  if (!isStatepoint(I)) {
2481  CopyToExportRegsIfNeeded(&I);
2482  }
2483 
2485  BranchProbabilityInfo *BPI = FuncInfo.BPI;
2486  BranchProbability EHPadBBProb =
2487  BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
2489  findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
2490 
2491  // Update successor info.
2492  addSuccessorWithProb(InvokeMBB, Return);
2493  for (auto &UnwindDest : UnwindDests) {
2494  UnwindDest.first->setIsEHPad();
2495  addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
2496  }
2497  InvokeMBB->normalizeSuccProbs();
2498 
2499  // Drop into normal successor.
2500  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
2501  MVT::Other, getControlRoot(),
2502  DAG.getBasicBlock(Return)));
2503 }
2504 
2505 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
2506  llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
2507 }
2508 
2509 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
2510  assert(FuncInfo.MBB->isEHPad() &&
2511  "Call to landingpad not in landing pad!");
2512 
2513  MachineBasicBlock *MBB = FuncInfo.MBB;
2514  addLandingPadInfo(LP, *MBB);
2515 
2516  // If there aren't registers to copy the values into (e.g., during SjLj
2517  // exceptions), then don't bother to create these DAG nodes.
2518  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2519  const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
2520  if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
2521  TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
2522  return;
2523 
2524  // If landingpad's return type is token type, we don't create DAG nodes
2525  // for its exception pointer and selector value. The extraction of exception
2526  // pointer or selector value from token type landingpads is not currently
2527  // supported.
2528  if (LP.getType()->isTokenTy())
2529  return;
2530 
2532  SDLoc dl = getCurSDLoc();
2533  ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
2534  assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
2535 
2536  // Get the two live-in registers as SDValues. The physregs have already been
2537  // copied into virtual registers.
2538  SDValue Ops[2];
2539  if (FuncInfo.ExceptionPointerVirtReg) {
2540  Ops[0] = DAG.getZExtOrTrunc(
2541  DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2542  FuncInfo.ExceptionPointerVirtReg,
2543  TLI.getPointerTy(DAG.getDataLayout())),
2544  dl, ValueVTs[0]);
2545  } else {
2546  Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
2547  }
2548  Ops[1] = DAG.getZExtOrTrunc(
2549  DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2550  FuncInfo.ExceptionSelectorVirtReg,
2551  TLI.getPointerTy(DAG.getDataLayout())),
2552  dl, ValueVTs[1]);
2553 
2554  // Merge into one.
2555  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
2556  DAG.getVTList(ValueVTs), Ops);
2557  setValue(&LP, Res);
2558 }
2559 
2560 void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
2561 #ifndef NDEBUG
2562  for (const CaseCluster &CC : Clusters)
2563  assert(CC.Low == CC.High && "Input clusters must be single-case");
2564 #endif
2565 
2566  llvm::sort(Clusters.begin(), Clusters.end(),
2567  [](const CaseCluster &a, const CaseCluster &b) {
2568  return a.Low->getValue().slt(b.Low->getValue());
2569  });
2570 
2571  // Merge adjacent clusters with the same destination.
2572  const unsigned N = Clusters.size();
2573  unsigned DstIndex = 0;
2574  for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
2575  CaseCluster &CC = Clusters[SrcIndex];
2576  const ConstantInt *CaseVal = CC.Low;
2577  MachineBasicBlock *Succ = CC.MBB;
2578 
2579  if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
2580  (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
2581  // If this case has the same successor and is a neighbour, merge it into
2582  // the previous cluster.
2583  Clusters[DstIndex - 1].High = CaseVal;
2584  Clusters[DstIndex - 1].Prob += CC.Prob;
2585  } else {
2586  std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
2587  sizeof(Clusters[SrcIndex]));
2588  }
2589  }
2590  Clusters.resize(DstIndex);
2591 }
2592 
2594  MachineBasicBlock *Last) {
2595  // Update JTCases.
2596  for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
2597  if (JTCases[i].first.HeaderBB == First)
2598  JTCases[i].first.HeaderBB = Last;
2599 
2600  // Update BitTestCases.
2601  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
2602  if (BitTestCases[i].Parent == First)
2603  BitTestCases[i].Parent = Last;
2604 }
2605 
2606 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
2607  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
2608 
2609  // Update machine-CFG edges with unique successors.
2611  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
2612  BasicBlock *BB = I.getSuccessor(i);
2613  bool Inserted = Done.insert(BB).second;
2614  if (!Inserted)
2615  continue;
2616 
2617  MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
2618  addSuccessorWithProb(IndirectBrMBB, Succ);
2619  }
2620  IndirectBrMBB->normalizeSuccProbs();
2621 
2622  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
2623  MVT::Other, getControlRoot(),
2624  getValue(I.getAddress())));
2625 }
2626 
2627 void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
2628  if (DAG.getTarget().Options.TrapUnreachable)
2629  DAG.setRoot(
2630  DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
2631 }
2632 
2633 void SelectionDAGBuilder::visitFSub(const User &I) {
2634  // -0.0 - X --> fneg
2635  Type *Ty = I.getType();
2636  if (isa<Constant>(I.getOperand(0)) &&
2638  SDValue Op2 = getValue(I.getOperand(1));
2639  setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
2640  Op2.getValueType(), Op2));
2641  return;
2642  }
2643 
2644  visitBinary(I, ISD::FSUB);
2645 }
2646 
2647 /// Checks if the given instruction performs a vector reduction, in which case
2648 /// we have the freedom to alter the elements in the result as long as the
2649 /// reduction of them stays unchanged.
2650 static bool isVectorReductionOp(const User *I) {
2651  const Instruction *Inst = dyn_cast<Instruction>(I);
2652  if (!Inst || !Inst->getType()->isVectorTy())
2653  return false;
2654 
2655  auto OpCode = Inst->getOpcode();
2656  switch (OpCode) {
2657  case Instruction::Add:
2658  case Instruction::Mul:
2659  case Instruction::And:
2660  case Instruction::Or:
2661  case Instruction::Xor:
2662  break;
2663  case Instruction::FAdd:
2664  case Instruction::FMul:
2665  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2666  if (FPOp->getFastMathFlags().isFast())
2667  break;
2669  default:
2670  return false;
2671  }
2672 
2673  unsigned ElemNum = Inst->getType()->getVectorNumElements();
2674  unsigned ElemNumToReduce = ElemNum;
2675 
2676  // Do DFS search on the def-use chain from the given instruction. We only
2677  // allow four kinds of operations during the search until we reach the
2678  // instruction that extracts the first element from the vector:
2679  //
2680  // 1. The reduction operation of the same opcode as the given instruction.
2681  //
2682  // 2. PHI node.
2683  //
2684  // 3. ShuffleVector instruction together with a reduction operation that
2685  // does a partial reduction.
2686  //
2687  // 4. ExtractElement that extracts the first element from the vector, and we
2688  // stop searching the def-use chain here.
2689  //
2690  // 3 & 4 above perform a reduction on all elements of the vector. We push defs
2691  // from 1-3 to the stack to continue the DFS. The given instruction is not
2692  // a reduction operation if we meet any other instructions other than those
2693  // listed above.
2694 
2695  SmallVector<const User *, 16> UsersToVisit{Inst};
2697  bool ReduxExtracted = false;
2698 
2699  while (!UsersToVisit.empty()) {
2700  auto User = UsersToVisit.back();
2701  UsersToVisit.pop_back();
2702  if (!Visited.insert(User).second)
2703  continue;
2704 
2705  for (const auto &U : User->users()) {
2706  auto Inst = dyn_cast<Instruction>(U);
2707  if (!Inst)
2708  return false;
2709 
2710  if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
2711  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2712  if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast())
2713  return false;
2714  UsersToVisit.push_back(U);
2715  } else if (const ShuffleVectorInst *ShufInst =
2716  dyn_cast<ShuffleVectorInst>(U)) {
2717  // Detect the following pattern: A ShuffleVector instruction together
2718  // with a reduction that do partial reduction on the first and second
2719  // ElemNumToReduce / 2 elements, and store the result in
2720  // ElemNumToReduce / 2 elements in another vector.
2721 
2722  unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
2723  if (ResultElements < ElemNum)
2724  return false;
2725 
2726  if (ElemNumToReduce == 1)
2727  return false;
2728  if (!isa<UndefValue>(U->getOperand(1)))
2729  return false;
2730  for (unsigned i = 0; i < ElemNumToReduce / 2; ++i)
2731  if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2))
2732  return false;
2733  for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i)
2734  if (ShufInst->getMaskValue(i) != -1)
2735  return false;
2736 
2737  // There is only one user of this ShuffleVector instruction, which
2738  // must be a reduction operation.
2739  if (!U->hasOneUse())
2740  return false;
2741 
2742  auto U2 = dyn_cast<Instruction>(*U->user_begin());
2743  if (!U2 || U2->getOpcode() != OpCode)
2744  return false;
2745 
2746  // Check operands of the reduction operation.
2747  if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) ||
2748  (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) {
2749  UsersToVisit.push_back(U2);
2750  ElemNumToReduce /= 2;
2751  } else
2752  return false;
2753  } else if (isa<ExtractElementInst>(U)) {
2754  // At this moment we should have reduced all elements in the vector.
2755  if (ElemNumToReduce != 1)
2756  return false;
2757 
2758  const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
2759  if (!Val || Val->getZExtValue() != 0)
2760  return false;
2761 
2762  ReduxExtracted = true;
2763  } else
2764  return false;
2765  }
2766  }
2767  return ReduxExtracted;
2768 }
2769 
2770 void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
2771  SDNodeFlags Flags;
2772  if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
2773  Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
2774  Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
2775  }
2776  if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) {
2777  Flags.setExact(ExactOp->isExact());
2778  }
2779  if (isVectorReductionOp(&I)) {
2780  Flags.setVectorReduction(true);
2781  LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
2782  }
2783 
2784  SDValue Op1 = getValue(I.getOperand(0));
2785  SDValue Op2 = getValue(I.getOperand(1));
2786  SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(),
2787  Op1, Op2, Flags);
2788  setValue(&I, BinNodeValue);
2789 }
2790 
2791 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
2792  SDValue Op1 = getValue(I.getOperand(0));
2793  SDValue Op2 = getValue(I.getOperand(1));
2794 
2795  EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
2796  Op2.getValueType(), DAG.getDataLayout());
2797 
2798  // Coerce the shift amount to the right type if we can.
2799  if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
2800  unsigned ShiftSize = ShiftTy.getSizeInBits();
2801  unsigned Op2Size = Op2.getValueSizeInBits();
2802  SDLoc DL = getCurSDLoc();
2803 
2804  // If the operand is smaller than the shift count type, promote it.
2805  if (ShiftSize > Op2Size)
2806  Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
2807 
2808  // If the operand is larger than the shift count type but the shift
2809  // count type has enough bits to represent any shift value, truncate
2810  // it now. This is a common case and it exposes the truncate to
2811  // optimization early.
2812  else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
2813  Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
2814  // Otherwise we'll need to temporarily settle for some other convenient
2815  // type. Type legalization will make adjustments once the shiftee is split.
2816  else
2817  Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
2818  }
2819 
2820  bool nuw = false;
2821  bool nsw = false;
2822  bool exact = false;
2823 
2824  if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
2825 
2826  if (const OverflowingBinaryOperator *OFBinOp =
2827  dyn_cast<const OverflowingBinaryOperator>(&I)) {
2828  nuw = OFBinOp->hasNoUnsignedWrap();
2829  nsw = OFBinOp->hasNoSignedWrap();
2830  }
2831  if (const PossiblyExactOperator *ExactOp =
2832  dyn_cast<const PossiblyExactOperator>(&I))
2833  exact = ExactOp->isExact();
2834  }
2835  SDNodeFlags Flags;
2836  Flags.setExact(exact);
2837  Flags.setNoSignedWrap(nsw);
2838  Flags.setNoUnsignedWrap(nuw);
2839  SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
2840  Flags);
2841  setValue(&I, Res);
2842 }
2843 
2844 void SelectionDAGBuilder::visitSDiv(const User &I) {
2845  SDValue Op1 = getValue(I.getOperand(0));
2846  SDValue Op2 = getValue(I.getOperand(1));
2847 
2848  SDNodeFlags Flags;
2849  Flags.setExact(isa<PossiblyExactOperator>(&I) &&
2850  cast<PossiblyExactOperator>(&I)->isExact());
2851  setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
2852  Op2, Flags));
2853 }
2854 
2855 void SelectionDAGBuilder::visitICmp(const User &I) {
2857  if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
2858  predicate = IC->getPredicate();
2859  else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
2860  predicate = ICmpInst::Predicate(IC->getPredicate());
2861  SDValue Op1 = getValue(I.getOperand(0));
2862  SDValue Op2 = getValue(I.getOperand(1));
2863  ISD::CondCode Opcode = getICmpCondCode(predicate);
2864 
2865  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2866  I.getType());
2867  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
2868 }
2869 
2870 void SelectionDAGBuilder::visitFCmp(const User &I) {
2872  if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
2873  predicate = FC->getPredicate();
2874  else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
2875  predicate = FCmpInst::Predicate(FC->getPredicate());
2876  SDValue Op1 = getValue(I.getOperand(0));
2877  SDValue Op2 = getValue(I.getOperand(1));
2878 
2879  ISD::CondCode Condition = getFCmpCondCode(predicate);
2880  auto *FPMO = dyn_cast<FPMathOperator>(&I);
2881  if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath)
2882  Condition = getFCmpCodeWithoutNaN(Condition);
2883 
2884  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2885  I.getType());
2886  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
2887 }
2888 
2889 // Check if the condition of the select has one use or two users that are both
2890 // selects with the same condition.
2891 static bool hasOnlySelectUsers(const Value *Cond) {
2892  return llvm::all_of(Cond->users(), [](const Value *V) {
2893  return isa<SelectInst>(V);
2894  });
2895 }
2896 
2897 void SelectionDAGBuilder::visitSelect(const User &I) {
2900  ValueVTs);
2901  unsigned NumValues = ValueVTs.size();
2902  if (NumValues == 0) return;
2903 
2904  SmallVector<SDValue, 4> Values(NumValues);
2905  SDValue Cond = getValue(I.getOperand(0));
2906  SDValue LHSVal = getValue(I.getOperand(1));
2907  SDValue RHSVal = getValue(I.getOperand(2));
2908  auto BaseOps = {Cond};
2909  ISD::NodeType OpCode = Cond.getValueType().isVector() ?
2911 
2912  // Min/max matching is only viable if all output VTs are the same.
2913  if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
2914  EVT VT = ValueVTs[0];
2915  LLVMContext &Ctx = *DAG.getContext();
2916  auto &TLI = DAG.getTargetLoweringInfo();
2917 
2918  // We care about the legality of the operation after it has been type
2919  // legalized.
2920  while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
2921  VT != TLI.getTypeToTransformTo(Ctx, VT))
2922  VT = TLI.getTypeToTransformTo(Ctx, VT);
2923 
2924  // If the vselect is legal, assume we want to leave this as a vector setcc +
2925  // vselect. Otherwise, if this is going to be scalarized, we want to see if
2926  // min/max is legal on the scalar type.
2927  bool UseScalarMinMax = VT.isVector() &&
2928  !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
2929 
2930  Value *LHS, *RHS;
2931  auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
2933  switch (SPR.Flavor) {
2934  case SPF_UMAX: Opc = ISD::UMAX; break;
2935  case SPF_UMIN: Opc = ISD::UMIN; break;
2936  case SPF_SMAX: Opc = ISD::SMAX; break;
2937  case SPF_SMIN: Opc = ISD::SMIN; break;
2938  case SPF_FMINNUM:
2939  switch (SPR.NaNBehavior) {
2940  case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2941  case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break;
2942  case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
2943  case SPNB_RETURNS_ANY: {
2944  if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
2945  Opc = ISD::FMINNUM;
2946  else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT))
2947  Opc = ISD::FMINNAN;
2948  else if (UseScalarMinMax)
2949  Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
2951  break;
2952  }
2953  }
2954  break;
2955  case SPF_FMAXNUM:
2956  switch (SPR.NaNBehavior) {
2957  case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2958  case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break;
2959  case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
2960  case SPNB_RETURNS_ANY:
2961 
2962  if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
2963  Opc = ISD::FMAXNUM;
2964  else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT))
2965  Opc = ISD::FMAXNAN;
2966  else if (UseScalarMinMax)
2967  Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
2969  break;
2970  }
2971  break;
2972  default: break;
2973  }
2974 
2975  if (Opc != ISD::DELETED_NODE &&
2976  (TLI.isOperationLegalOrCustom(Opc, VT) ||
2977  (UseScalarMinMax &&
2978  TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
2979  // If the underlying comparison instruction is used by any other
2980  // instruction, the consumed instructions won't be destroyed, so it is
2981  // not profitable to convert to a min/max.
2982  hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
2983  OpCode = Opc;
2984  LHSVal = getValue(LHS);
2985  RHSVal = getValue(RHS);
2986  BaseOps = {};
2987  }
2988  }
2989 
2990  for (unsigned i = 0; i != NumValues; ++i) {
2991  SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
2992  Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
2993  Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
2994  Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
2995  LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
2996  Ops);
2997  }
2998 
2999  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3000  DAG.getVTList(ValueVTs), Values));
3001 }
3002 
3003 void SelectionDAGBuilder::visitTrunc(const User &I) {
3004  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
3005  SDValue N = getValue(I.getOperand(0));
3006  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3007  I.getType());
3008  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
3009 }
3010 
3011 void SelectionDAGBuilder::visitZExt(const User &I) {
3012  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3013  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
3014  SDValue N = getValue(I.getOperand(0));
3015  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3016  I.getType());
3017  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
3018 }
3019 
3020 void SelectionDAGBuilder::visitSExt(const User &I) {
3021  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3022  // SExt also can't be a cast to bool for same reason. So, nothing much to do
3023  SDValue N = getValue(I.getOperand(0));
3024  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3025  I.getType());
3026  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
3027 }
3028 
3029 void SelectionDAGBuilder::visitFPTrunc(const User &I) {
3030  // FPTrunc is never a no-op cast, no need to check
3031  SDValue N = getValue(I.getOperand(0));
3032  SDLoc dl = getCurSDLoc();
3033  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3034  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3035  setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
3036  DAG.getTargetConstant(
3037  0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
3038 }
3039 
3040 void SelectionDAGBuilder::visitFPExt(const User &I) {
3041  // FPExt is never a no-op cast, no need to check
3042  SDValue N = getValue(I.getOperand(0));
3043  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3044  I.getType());
3045  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
3046 }
3047 
3048 void SelectionDAGBuilder::visitFPToUI(const User &I) {
3049  // FPToUI is never a no-op cast, no need to check
3050  SDValue N = getValue(I.getOperand(0));
3051  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3052  I.getType());
3053  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
3054 }
3055 
3056 void SelectionDAGBuilder::visitFPToSI(const User &I) {
3057  // FPToSI is never a no-op cast, no need to check
3058  SDValue N = getValue(I.getOperand(0));
3059  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3060  I.getType());
3061  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
3062 }
3063 
3064 void SelectionDAGBuilder::visitUIToFP(const User &I) {
3065  // UIToFP is never a no-op cast, no need to check
3066  SDValue N = getValue(I.getOperand(0));
3067  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3068  I.getType());
3069  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
3070 }
3071 
3072 void SelectionDAGBuilder::visitSIToFP(const User &I) {
3073  // SIToFP is never a no-op cast, no need to check
3074  SDValue N = getValue(I.getOperand(0));
3075  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3076  I.getType());
3077  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
3078 }
3079 
3080 void SelectionDAGBuilder::visitPtrToInt(const User &I) {
3081  // What to do depends on the size of the integer and the size of the pointer.
3082  // We can either truncate, zero extend, or no-op, accordingly.
3083  SDValue N = getValue(I.getOperand(0));
3084  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3085  I.getType());
3086  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3087 }
3088 
3089 void SelectionDAGBuilder::visitIntToPtr(const User &I) {
3090  // What to do depends on the size of the integer and the size of the pointer.
3091  // We can either truncate, zero extend, or no-op, accordingly.
3092  SDValue N = getValue(I.getOperand(0));
3093  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3094  I.getType());
3095  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3096 }
3097 
3098 void SelectionDAGBuilder::visitBitCast(const User &I) {
3099  SDValue N = getValue(I.getOperand(0));
3100  SDLoc dl = getCurSDLoc();
3101  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3102  I.getType());
3103 
3104  // BitCast assures us that source and destination are the same size so this is
3105  // either a BITCAST or a no-op.
3106  if (DestVT != N.getValueType())
3107  setValue(&I, DAG.getNode(ISD::BITCAST, dl,
3108  DestVT, N)); // convert types.
3109  // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
3110  // might fold any kind of constant expression to an integer constant and that
3111  // is not what we are looking for. Only recognize a bitcast of a genuine
3112  // constant integer as an opaque constant.
3113  else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
3114  setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
3115  /*isOpaque*/true));
3116  else
3117  setValue(&I, N); // noop cast.
3118 }
3119 
3120 void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
3121  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3122  const Value *SV = I.getOperand(0);
3123  SDValue N = getValue(SV);
3124  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3125 
3126  unsigned SrcAS = SV->getType()->getPointerAddressSpace();
3127  unsigned DestAS = I.getType()->getPointerAddressSpace();
3128 
3129  if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
3130  N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
3131 
3132  setValue(&I, N);
3133 }
3134 
3135 void SelectionDAGBuilder::visitInsertElement(const User &I) {
3136  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3137  SDValue InVec = getValue(I.getOperand(0));
3138  SDValue InVal = getValue(I.getOperand(1));
3139  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
3140  TLI.getVectorIdxTy(DAG.getDataLayout()));
3141  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
3142  TLI.getValueType(DAG.getDataLayout(), I.getType()),
3143  InVec, InVal, InIdx));
3144 }
3145 
3146 void SelectionDAGBuilder::visitExtractElement(const User &I) {
3147  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3148  SDValue InVec = getValue(I.getOperand(0));
3149  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
3150  TLI.getVectorIdxTy(DAG.getDataLayout()));
3151  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
3152  TLI.getValueType(DAG.getDataLayout(), I.getType()),
3153  InVec, InIdx));
3154 }
3155 
3156 void SelectionDAGBuilder::visitShuffleVector(const User &I) {
3157  SDValue Src1 = getValue(I.getOperand(0));
3158  SDValue Src2 = getValue(I.getOperand(1));
3159  SDLoc DL = getCurSDLoc();
3160 
3162  ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
3163  unsigned MaskNumElts = Mask.size();
3164 
3165  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3166  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3167  EVT SrcVT = Src1.getValueType();
3168  unsigned SrcNumElts = SrcVT.getVectorNumElements();
3169 
3170  if (SrcNumElts == MaskNumElts) {
3171  setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
3172  return;
3173  }
3174 
3175  // Normalize the shuffle vector since mask and vector length don't match.
3176  if (SrcNumElts < MaskNumElts) {
3177  // Mask is longer than the source vectors. We can use concatenate vector to
3178  // make the mask and vectors lengths match.
3179 
3180  if (MaskNumElts % SrcNumElts == 0) {
3181  // Mask length is a multiple of the source vector length.
3182  // Check if the shuffle is some kind of concatenation of the input
3183  // vectors.
3184  unsigned NumConcat = MaskNumElts / SrcNumElts;
3185  bool IsConcat = true;
3186  SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
3187  for (unsigned i = 0; i != MaskNumElts; ++i) {
3188  int Idx = Mask[i];
3189  if (Idx < 0)
3190  continue;
3191  // Ensure the indices in each SrcVT sized piece are sequential and that
3192  // the same source is used for the whole piece.
3193  if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
3194  (ConcatSrcs[i / SrcNumElts] >= 0 &&
3195  ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
3196  IsConcat = false;
3197  break;
3198  }
3199  // Remember which source this index came from.
3200  ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
3201  }
3202 
3203  // The shuffle is concatenating multiple vectors together. Just emit
3204  // a CONCAT_VECTORS operation.
3205  if (IsConcat) {
3206  SmallVector<SDValue, 8> ConcatOps;
3207  for (auto Src : ConcatSrcs) {
3208  if (Src < 0)
3209  ConcatOps.push_back(DAG.getUNDEF(SrcVT));
3210  else if (Src == 0)
3211  ConcatOps.push_back(Src1);
3212  else
3213  ConcatOps.push_back(Src2);
3214  }
3215  setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
3216  return;
3217  }
3218  }
3219 
3220  unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
3221  unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
3222  EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
3223  PaddedMaskNumElts);
3224 
3225  // Pad both vectors with undefs to make them the same length as the mask.
3226  SDValue UndefVal = DAG.getUNDEF(SrcVT);
3227 
3228  SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
3229  SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
3230  MOps1[0] = Src1;
3231  MOps2[0] = Src2;
3232 
3233  Src1 = Src1.isUndef()
3234  ? DAG.getUNDEF(PaddedVT)
3235  : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
3236  Src2 = Src2.isUndef()
3237  ? DAG.getUNDEF(PaddedVT)
3238  : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
3239 
3240  // Readjust mask for new input vector length.
3241  SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
3242  for (unsigned i = 0; i != MaskNumElts; ++i) {
3243  int Idx = Mask[i];
3244  if (Idx >= (int)SrcNumElts)
3245  Idx -= SrcNumElts - PaddedMaskNumElts;
3246  MappedOps[i] = Idx;
3247  }
3248 
3249  SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
3250 
3251  // If the concatenated vector was padded, extract a subvector with the
3252  // correct number of elements.
3253  if (MaskNumElts != PaddedMaskNumElts)
3254  Result = DAG.getNode(
3255  ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
3256  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
3257 
3258  setValue(&I, Result);
3259  return;
3260  }
3261 
3262  if (SrcNumElts > MaskNumElts) {
3263  // Analyze the access pattern of the vector to see if we can extract
3264  // two subvectors and do the shuffle.
3265  int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
3266  bool CanExtract = true;
3267  for (int Idx : Mask) {
3268  unsigned Input = 0;
3269  if (Idx < 0)
3270  continue;
3271 
3272  if (Idx >= (int)SrcNumElts) {
3273  Input = 1;
3274  Idx -= SrcNumElts;
3275  }
3276 
3277  // If all the indices come from the same MaskNumElts sized portion of
3278  // the sources we can use extract. Also make sure the extract wouldn't
3279  // extract past the end of the source.
3280  int NewStartIdx = alignDown(Idx, MaskNumElts);
3281  if (NewStartIdx + MaskNumElts > SrcNumElts ||
3282  (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
3283  CanExtract = false;
3284  // Make sure we always update StartIdx as we use it to track if all
3285  // elements are undef.
3286  StartIdx[Input] = NewStartIdx;
3287  }
3288 
3289  if (StartIdx[0] < 0 && StartIdx[1] < 0) {
3290  setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
3291  return;
3292  }
3293  if (CanExtract) {
3294  // Extract appropriate subvector and generate a vector shuffle
3295  for (unsigned Input = 0; Input < 2; ++Input) {
3296  SDValue &Src = Input == 0 ? Src1 : Src2;
3297  if (StartIdx[Input] < 0)
3298  Src = DAG.getUNDEF(VT);
3299  else {
3300  Src = DAG.getNode(
3301  ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
3302  DAG.getConstant(StartIdx[Input], DL,
3303  TLI.getVectorIdxTy(DAG.getDataLayout())));
3304  }
3305  }
3306 
3307  // Calculate new mask.
3308  SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
3309  for (int &Idx : MappedOps) {
3310  if (Idx >= (int)SrcNumElts)
3311  Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
3312  else if (Idx >= 0)
3313  Idx -= StartIdx[0];
3314  }
3315 
3316  setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
3317  return;
3318  }
3319  }
3320 
3321  // We can't use either concat vectors or extract subvectors so fall back to
3322  // replacing the shuffle with extract and build vector.
3323  // to insert and build vector.
3324  EVT EltVT = VT.getVectorElementType();
3325  EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
3327  for (int Idx : Mask) {
3328  SDValue Res;
3329 
3330  if (Idx < 0) {
3331  Res = DAG.getUNDEF(EltVT);
3332  } else {
3333  SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
3334  if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
3335 
3336  Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3337  EltVT, Src, DAG.getConstant(Idx, DL, IdxVT));
3338  }
3339 
3340  Ops.push_back(Res);
3341  }
3342 
3343  setValue(&I, DAG.getBuildVector(VT, DL, Ops));
3344 }
3345 
3346 void SelectionDAGBuilder::visitInsertValue(const User &I) {
3347  ArrayRef<unsigned> Indices;
3348  if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
3349  Indices = IV->getIndices();
3350  else
3351  Indices = cast<ConstantExpr>(&I)->getIndices();
3352 
3353  const Value *Op0 = I.getOperand(0);
3354  const Value *Op1 = I.getOperand(1);
3355  Type *AggTy = I.getType();
3356  Type *ValTy = Op1->getType();
3357  bool IntoUndef = isa<UndefValue>(Op0);
3358  bool FromUndef = isa<UndefValue>(Op1);
3359 
3360  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3361 
3362  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3363  SmallVector<EVT, 4> AggValueVTs;
3364  ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
3365  SmallVector<EVT, 4> ValValueVTs;
3366  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3367 
3368  unsigned NumAggValues = AggValueVTs.size();
3369  unsigned NumValValues = ValValueVTs.size();
3370  SmallVector<SDValue, 4> Values(NumAggValues);
3371 
3372  // Ignore an insertvalue that produces an empty object
3373  if (!NumAggValues) {
3374  setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3375  return;
3376  }
3377 
3378  SDValue Agg = getValue(Op0);
3379  unsigned i = 0;
3380  // Copy the beginning value(s) from the original aggregate.
3381  for (; i != LinearIndex; ++i)
3382  Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3383  SDValue(Agg.getNode(), Agg.getResNo() + i);
3384  // Copy values from the inserted value(s).
3385  if (NumValValues) {
3386  SDValue Val = getValue(Op1);
3387  for (; i != LinearIndex + NumValValues; ++i)
3388  Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3389  SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
3390  }
3391  // Copy remaining value(s) from the original aggregate.
3392  for (; i != NumAggValues; ++i)
3393  Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3394  SDValue(Agg.getNode(), Agg.getResNo() + i);
3395 
3396  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3397  DAG.getVTList(AggValueVTs), Values));
3398 }
3399 
3400 void SelectionDAGBuilder::visitExtractValue(const User &I) {
3401  ArrayRef<unsigned> Indices;
3402  if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
3403  Indices = EV->getIndices();
3404  else
3405  Indices = cast<ConstantExpr>(&I)->getIndices();
3406 
3407  const Value *Op0 = I.getOperand(0);
3408  Type *AggTy = Op0->getType();
3409  Type *ValTy = I.getType();
3410  bool OutOfUndef = isa<UndefValue>(Op0);
3411 
3412  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3413 
3414  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3415  SmallVector<EVT, 4> ValValueVTs;
3416  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3417 
3418  unsigned NumValValues = ValValueVTs.size();
3419 
3420  // Ignore a extractvalue that produces an empty object
3421  if (!NumValValues) {
3422  setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3423  return;
3424  }
3425 
3426  SmallVector<SDValue, 4> Values(NumValValues);
3427 
3428  SDValue Agg = getValue(Op0);
3429  // Copy out the selected value(s).
3430  for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
3431  Values[i - LinearIndex] =
3432  OutOfUndef ?
3433  DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
3434  SDValue(Agg.getNode(), Agg.getResNo() + i);
3435 
3436  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3437  DAG.getVTList(ValValueVTs), Values));
3438 }
3439 
3440 void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
3441  Value *Op0 = I.getOperand(0);
3442  // Note that the pointer operand may be a vector of pointers. Take the scalar
3443  // element which holds a pointer.
3444  unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
3445  SDValue N = getValue(Op0);
3446  SDLoc dl = getCurSDLoc();
3447 
3448  // Normalize Vector GEP - all scalar operands should be converted to the
3449  // splat vector.
3450  unsigned VectorWidth = I.getType()->isVectorTy() ?
3451  cast<VectorType>(I.getType())->getVectorNumElements() : 0;
3452 
3453  if (VectorWidth && !N.getValueType().isVector()) {
3454  LLVMContext &Context = *DAG.getContext();
3455  EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
3456  N = DAG.getSplatBuildVector(VT, dl, N);
3457  }
3458 
3459  for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
3460  GTI != E; ++GTI) {
3461  const Value *Idx = GTI.getOperand();
3462  if (StructType *StTy = GTI.getStructTypeOrNull()) {
3463  unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
3464  if (Field) {
3465  // N = N + Offset
3466  uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
3467 
3468  // In an inbounds GEP with an offset that is nonnegative even when
3469  // interpreted as signed, assume there is no unsigned overflow.
3470  SDNodeFlags Flags;
3471  if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
3472  Flags.setNoUnsignedWrap(true);
3473 
3474  N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
3475  DAG.getConstant(Offset, dl, N.getValueType()), Flags);
3476  }
3477  } else {
3478  unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
3479  MVT IdxTy = MVT::getIntegerVT(IdxSize);
3480  APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType()));
3481 
3482  // If this is a scalar constant or a splat vector of constants,
3483  // handle it quickly.
3484  const auto *CI = dyn_cast<ConstantInt>(Idx);
3485  if (!CI && isa<ConstantDataVector>(Idx) &&
3486  cast<ConstantDataVector>(Idx)->getSplatValue())
3487  CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
3488 
3489  if (CI) {
3490  if (CI->isZero())
3491  continue;
3492  APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
3493  LLVMContext &Context = *DAG.getContext();
3494  SDValue OffsVal = VectorWidth ?
3495  DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
3496  DAG.getConstant(Offs, dl, IdxTy);
3497 
3498  // In an inbouds GEP with an offset that is nonnegative even when
3499  // interpreted as signed, assume there is no unsigned overflow.
3500  SDNodeFlags Flags;
3501  if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
3502  Flags.setNoUnsignedWrap(true);
3503 
3504  N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
3505  continue;
3506  }
3507 
3508  // N = N + Idx * ElementSize;
3509  SDValue IdxN = getValue(Idx);
3510 
3511  if (!IdxN.getValueType().isVector() && VectorWidth) {
3512  EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
3513  IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
3514  }
3515 
3516  // If the index is smaller or larger than intptr_t, truncate or extend
3517  // it.
3518  IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
3519 
3520  // If this is a multiply by a power of two, turn it into a shl
3521  // immediately. This is a very common case.
3522  if (ElementSize != 1) {
3523  if (ElementSize.isPowerOf2()) {
3524  unsigned Amt = ElementSize.logBase2();
3525  IdxN = DAG.getNode(ISD::SHL, dl,
3526  N.getValueType(), IdxN,
3527  DAG.getConstant(Amt, dl, IdxN.getValueType()));
3528  } else {
3529  SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType());
3530  IdxN = DAG.getNode(ISD::MUL, dl,
3531  N.getValueType(), IdxN, Scale);
3532  }
3533  }
3534 
3535  N = DAG.getNode(ISD::ADD, dl,
3536  N.getValueType(), N, IdxN);
3537  }
3538  }
3539 
3540  setValue(&I, N);
3541 }
3542 
3543 void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
3544  // If this is a fixed sized alloca in the entry block of the function,
3545  // allocate it statically on the stack.
3546  if (FuncInfo.StaticAllocaMap.count(&I))
3547  return; // getValue will auto-populate this.
3548 
3549  SDLoc dl = getCurSDLoc();
3550  Type *Ty = I.getAllocatedType();
3551  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3552  auto &DL = DAG.getDataLayout();
3553  uint64_t TySize = DL.getTypeAllocSize(Ty);
3554  unsigned Align =
3555  std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
3556 
3557  SDValue AllocSize = getValue(I.getArraySize());
3558 
3559  EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
3560  if (AllocSize.getValueType() != IntPtr)
3561  AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
3562 
3563  AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
3564  AllocSize,
3565  DAG.getConstant(TySize, dl, IntPtr));
3566 
3567  // Handle alignment. If the requested alignment is less than or equal to
3568  // the stack alignment, ignore it. If the size is greater than or equal to
3569  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
3570  unsigned StackAlign =
3572  if (Align <= StackAlign)
3573  Align = 0;
3574 
3575  // Round the size of the allocation up to the stack alignment size
3576  // by add SA-1 to the size. This doesn't overflow because we're computing
3577  // an address inside an alloca.
3578  SDNodeFlags Flags;
3579  Flags.setNoUnsignedWrap(true);
3580  AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
3581  DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags);
3582 
3583  // Mask out the low bits for alignment purposes.
3584  AllocSize =
3585  DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
3586  DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr));
3587 
3588  SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)};
3589  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
3590  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
3591  setValue(&I, DSA);
3592  DAG.setRoot(DSA.getValue(1));
3593 
3594  assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
3595 }
3596 
3597 void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
3598  if (I.isAtomic())
3599  return visitAtomicLoad(I);
3600 
3601  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3602  const Value *SV = I.getOperand(0);
3603  if (TLI.supportSwiftError()) {
3604  // Swifterror values can come from either a function parameter with
3605  // swifterror attribute or an alloca with swifterror attribute.
3606  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
3607  if (Arg->hasSwiftErrorAttr())
3608  return visitLoadFromSwiftError(I);
3609  }
3610 
3611  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
3612  if (Alloca->isSwiftError())
3613  return visitLoadFromSwiftError(I);
3614  }
3615  }
3616 
3617  SDValue Ptr = getValue(SV);
3618 
3619  Type *Ty = I.getType();
3620 
3621  bool isVolatile = I.isVolatile();
3622  bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
3623  bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
3624  bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout());
3625  unsigned Alignment = I.getAlignment();
3626 
3627  AAMDNodes AAInfo;
3628  I.getAAMetadata(AAInfo);
3629  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3630 
3633  ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
3634  unsigned NumValues = ValueVTs.size();
3635  if (NumValues == 0)
3636  return;
3637 
3638  SDValue Root;
3639  bool ConstantMemory = false;
3640  if (isVolatile || NumValues > MaxParallelChains)
3641  // Serialize volatile loads with other side effects.
3642  Root = getRoot();
3643  else if (AA && AA->pointsToConstantMemory(MemoryLocation(
3644  SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
3645  // Do not serialize (non-volatile) loads of constant memory with anything.
3646  Root = DAG.getEntryNode();
3647  ConstantMemory = true;
3648  } else {
3649  // Do not serialize non-volatile loads against each other.
3650  Root = DAG.getRoot();
3651  }
3652 
3653  SDLoc dl = getCurSDLoc();
3654 
3655  if (isVolatile)
3656  Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
3657 
3658  // An aggregate load cannot wrap around the address space, so offsets to its
3659  // parts don't wrap either.
3660  SDNodeFlags Flags;
3661  Flags.setNoUnsignedWrap(true);
3662 
3663  SmallVector<SDValue, 4> Values(NumValues);
3664  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3665  EVT PtrVT = Ptr.getValueType();
3666  unsigned ChainI = 0;
3667  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3668  // Serializing loads here may result in excessive register pressure, and
3669  // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
3670  // could recover a bit by hoisting nodes upward in the chain by recognizing
3671  // they are side-effect free or do not alias. The optimizer should really
3672  // avoid this case by converting large object/array copies to llvm.memcpy
3673  // (MaxParallelChains should always remain as failsafe).
3674  if (ChainI == MaxParallelChains) {
3675  assert(PendingLoads.empty() && "PendingLoads must be serialized first");
3676  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3677  makeArrayRef(Chains.data(), ChainI));
3678  Root = Chain;
3679  ChainI = 0;
3680  }
3681  SDValue A = DAG.getNode(ISD::ADD, dl,
3682  PtrVT, Ptr,
3683  DAG.getConstant(Offsets[i], dl, PtrVT),
3684  Flags);
3685  auto MMOFlags = MachineMemOperand::MONone;
3686  if (isVolatile)
3687  MMOFlags |= MachineMemOperand::MOVolatile;
3688  if (isNonTemporal)
3690  if (isInvariant)
3691  MMOFlags |= MachineMemOperand::MOInvariant;
3692  if (isDereferenceable)
3694  MMOFlags |= TLI.getMMOFlags(I);
3695 
3696  SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
3697  MachinePointerInfo(SV, Offsets[i]), Alignment,
3698  MMOFlags, AAInfo, Ranges);
3699 
3700  Values[i] = L;
3701  Chains[ChainI] = L.getValue(1);
3702  }
3703 
3704  if (!ConstantMemory) {
3705  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3706  makeArrayRef(Chains.data(), ChainI));
3707  if (isVolatile)
3708  DAG.setRoot(Chain);
3709  else
3710  PendingLoads.push_back(Chain);
3711  }
3712 
3713  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
3714  DAG.getVTList(ValueVTs), Values));
3715 }
3716 
3717 void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
3719  "call visitStoreToSwiftError when backend supports swifterror");
3720 
3723  const Value *SrcV = I.getOperand(0);
3725  SrcV->getType(), ValueVTs, &Offsets);
3726  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3727  "expect a single EVT for swifterror");
3728 
3729  SDValue Src = getValue(SrcV);
3730  // Create a virtual register, then update the virtual register.
3731  unsigned VReg; bool CreatedVReg;
3732  std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I);
3733  // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
3734  // Chain can be getRoot or getControlRoot.
3735  SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
3736  SDValue(Src.getNode(), Src.getResNo()));
3737  DAG.setRoot(CopyNode);
3738  if (CreatedVReg)
3739  FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
3740 }
3741 
3742 void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
3744  "call visitLoadFromSwiftError when backend supports swifterror");
3745 
3746  assert(!I.isVolatile() &&
3747  I.getMetadata(LLVMContext::MD_nontemporal) == nullptr &&
3749  "Support volatile, non temporal, invariant for load_from_swift_error");
3750 
3751  const Value *SV = I.getOperand(0);
3752  Type *Ty = I.getType();
3753  AAMDNodes AAInfo;
3754  I.getAAMetadata(AAInfo);
3755  assert((!AA || !AA->pointsToConstantMemory(MemoryLocation(
3756  SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) &&
3757  "load_from_swift_error should not be constant memory");
3758 
3762  ValueVTs, &Offsets);
3763  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3764  "expect a single EVT for swifterror");
3765 
3766  // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
3767  SDValue L = DAG.getCopyFromReg(
3768  getRoot(), getCurSDLoc(),
3769  FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first,
3770  ValueVTs[0]);
3771 
3772  setValue(&I, L);
3773 }
3774 
3775 void SelectionDAGBuilder::visitStore(const StoreInst &I) {
3776  if (I.isAtomic())
3777  return visitAtomicStore(I);
3778 
3779  const Value *SrcV = I.getOperand(0);
3780  const Value *PtrV = I.getOperand(1);
3781 
3782  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3783  if (TLI.supportSwiftError()) {
3784  // Swifterror values can come from either a function parameter with
3785  // swifterror attribute or an alloca with swifterror attribute.
3786  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
3787  if (Arg->hasSwiftErrorAttr())
3788  return visitStoreToSwiftError(I);
3789  }
3790 
3791  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
3792  if (Alloca->isSwiftError())
3793  return visitStoreToSwiftError(I);
3794  }
3795  }
3796 
3800  SrcV->getType(), ValueVTs, &Offsets);
3801  unsigned NumValues = ValueVTs.size();
3802  if (NumValues == 0)
3803  return;
3804 
3805  // Get the lowered operands. Note that we do this after
3806  // checking if NumResults is zero, because with zero results
3807  // the operands won't have values in the map.
3808  SDValue Src = getValue(SrcV);
3809  SDValue Ptr = getValue(PtrV);
3810 
3811  SDValue Root = getRoot();
3812  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3813  SDLoc dl = getCurSDLoc();
3814  EVT PtrVT = Ptr.getValueType();
3815  unsigned Alignment = I.getAlignment();
3816  AAMDNodes AAInfo;
3817  I.getAAMetadata(AAInfo);
3818 
3819  auto MMOFlags = MachineMemOperand::MONone;
3820  if (I.isVolatile())
3821  MMOFlags |= MachineMemOperand::MOVolatile;
3822  if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
3824  MMOFlags |= TLI.getMMOFlags(I);
3825 
3826  // An aggregate load cannot wrap around the address space, so offsets to its
3827  // parts don't wrap either.
3828  SDNodeFlags Flags;
3829  Flags.setNoUnsignedWrap(true);
3830 
3831  unsigned ChainI = 0;
3832  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3833  // See visitLoad comments.
3834  if (ChainI == MaxParallelChains) {
3835  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3836  makeArrayRef(Chains.data(), ChainI));
3837  Root = Chain;
3838  ChainI = 0;
3839  }
3840  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
3841  DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
3842  SDValue St = DAG.getStore(
3843  Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add,
3844  MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo);
3845  Chains[ChainI] = St;
3846  }
3847 
3848  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3849  makeArrayRef(Chains.data(), ChainI));
3850  DAG.setRoot(StoreNode);
3851 }
3852 
3853 void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
3854  bool IsCompressing) {
3855  SDLoc sdl = getCurSDLoc();
3856 
3857  auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3858  unsigned& Alignment) {
3859  // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
3860  Src0 = I.getArgOperand(0);
3861  Ptr = I.getArgOperand(1);
3862  Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3863  Mask = I.getArgOperand(3);
3864  };
3865  auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3866  unsigned& Alignment) {
3867  // llvm.masked.compressstore.*(Src0, Ptr, Mask)
3868  Src0 = I.getArgOperand(0);
3869  Ptr = I.getArgOperand(1);
3870  Mask = I.getArgOperand(2);
3871  Alignment = 0;
3872  };
3873 
3874  Value *PtrOperand, *MaskOperand, *Src0Operand;
3875  unsigned Alignment;
3876  if (IsCompressing)
3877  getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3878  else
3879  getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3880 
3881  SDValue Ptr = getValue(PtrOperand);
3882  SDValue Src0 = getValue(Src0Operand);
3883  SDValue Mask = getValue(MaskOperand);
3884 
3885  EVT VT = Src0.getValueType();
3886  if (!Alignment)
3887  Alignment = DAG.getEVTAlignment(VT);
3888 
3889  AAMDNodes AAInfo;
3890  I.getAAMetadata(AAInfo);
3891 
3892  MachineMemOperand *MMO =
3893  DAG.getMachineFunction().
3894  getMachineMemOperand(MachinePointerInfo(PtrOperand),
3896  Alignment, AAInfo);
3897  SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
3898  MMO, false /* Truncating */,
3899  IsCompressing);
3900  DAG.setRoot(StoreNode);
3901  setValue(&I, StoreNode);
3902 }
3903 
3904 // Get a uniform base for the Gather/Scatter intrinsic.
3905 // The first argument of the Gather/Scatter intrinsic is a vector of pointers.
3906 // We try to represent it as a base pointer + vector of indices.
3907 // Usually, the vector of pointers comes from a 'getelementptr' instruction.
3908 // The first operand of the GEP may be a single pointer or a vector of pointers
3909 // Example:
3910 // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
3911 // or
3912 // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
3913 // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
3914 //
3915 // When the first GEP operand is a single pointer - it is the uniform base we
3916 // are looking for. If first operand of the GEP is a splat vector - we
3917 // extract the splat value and use it as a uniform base.
3918 // In all other cases the function returns 'false'.
3919 static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
3920  SDValue &Scale, SelectionDAGBuilder* SDB) {
3921  SelectionDAG& DAG = SDB->DAG;
3922  LLVMContext &Context = *DAG.getContext();
3923 
3924  assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
3926  if (!GEP)
3927  return false;
3928 
3929  const Value *GEPPtr = GEP->getPointerOperand();
3930  if (!GEPPtr->getType()->isVectorTy())
3931  Ptr = GEPPtr;
3932  else if (!(Ptr = getSplatValue(GEPPtr)))
3933  return false;
3934 
3935  unsigned FinalIndex = GEP->getNumOperands() - 1;
3936  Value *IndexVal = GEP->getOperand(FinalIndex);
3937 
3938  // Ensure all the other indices are 0.
3939  for (unsigned i = 1; i < FinalIndex; ++i) {
3940  auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i));
3941  if (!C || !C->isZero())
3942  return false;
3943  }
3944 
3945  // The operands of the GEP may be defined in another basic block.
3946  // In this case we'll not find nodes for the operands.
3947  if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
3948  return false;
3949 
3950  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3951  const DataLayout &DL = DAG.getDataLayout();
3952  Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()),
3953  SDB->getCurSDLoc(), TLI.getPointerTy(DL));
3954  Base = SDB->getValue(Ptr);
3955  Index = SDB->getValue(IndexVal);
3956 
3957  if (!Index.getValueType().isVector()) {
3958  unsigned GEPWidth = GEP->getType()->getVectorNumElements();
3959  EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
3960  Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
3961  }
3962  return true;
3963 }
3964 
3965 void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
3966  SDLoc sdl = getCurSDLoc();
3967 
3968  // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
3969  const Value *Ptr = I.getArgOperand(1);
3970  SDValue Src0 = getValue(I.getArgOperand(0));
3971  SDValue Mask = getValue(I.getArgOperand(3));
3972  EVT VT = Src0.getValueType();
3973  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
3974  if (!Alignment)
3975  Alignment = DAG.getEVTAlignment(VT);
3976  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3977 
3978  AAMDNodes AAInfo;
3979  I.getAAMetadata(AAInfo);
3980 
3981  SDValue Base;
3982  SDValue Index;
3983  SDValue Scale;
3984  const Value *BasePtr = Ptr;
3985  bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
3986 
3987  const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
3989  getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
3990  MachineMemOperand::MOStore, VT.getStoreSize(),
3991  Alignment, AAInfo);
3992  if (!UniformBase) {
3993  Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
3994  Index = getValue(Ptr);
3995  Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
3996  }
3997  SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };
3998  SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
3999  Ops, MMO);
4000  DAG.setRoot(Scatter);
4001  setValue(&I, Scatter);
4002 }
4003 
4004 void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
4005  SDLoc sdl = getCurSDLoc();
4006 
4007  auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
4008  unsigned& Alignment) {
4009  // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
4010  Ptr = I.getArgOperand(0);
4011  Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
4012  Mask = I.getArgOperand(2);
4013  Src0 = I.getArgOperand(3);
4014  };
4015  auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
4016  unsigned& Alignment) {
4017  // @llvm.masked.expandload.*(Ptr, Mask, Src0)
4018  Ptr = I.getArgOperand(0);
4019  Alignment = 0;
4020  Mask = I.getArgOperand(1);
4021  Src0 = I.getArgOperand(2);
4022  };
4023 
4024  Value *PtrOperand, *MaskOperand, *Src0Operand;
4025  unsigned Alignment;
4026  if (IsExpanding)
4027  getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4028  else
4029  getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4030 
4031  SDValue Ptr = getValue(PtrOperand);
4032  SDValue Src0 = getValue(Src0Operand);
4033  SDValue Mask = getValue(MaskOperand);
4034 
4035  EVT VT = Src0.getValueType();
4036  if (!Alignment)
4037  Alignment = DAG.getEVTAlignment(VT);
4038 
4039  AAMDNodes AAInfo;
4040  I.getAAMetadata(AAInfo);
4041  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4042 
4043  // Do not serialize masked loads of constant memory with anything.
4044  bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation(
4045  PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
4046  SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
4047 
4048  MachineMemOperand *MMO =
4049  DAG.getMachineFunction().
4050  getMachineMemOperand(MachinePointerInfo(PtrOperand),
4052  Alignment, AAInfo, Ranges);
4053 
4054  SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
4055  ISD::NON_EXTLOAD, IsExpanding);
4056  if (AddToChain) {
4057  SDValue OutChain = Load.getValue(1);
4058  DAG.setRoot(OutChain);
4059  }
4060  setValue(&I, Load);
4061 }
4062 
4063 void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
4064  SDLoc sdl = getCurSDLoc();
4065 
4066  // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
4067  const Value *Ptr = I.getArgOperand(0);
4068  SDValue Src0 = getValue(I.getArgOperand(3));
4069  SDValue Mask = getValue(I.getArgOperand(2));
4070 
4071  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4072  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4073  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
4074  if (!Alignment)
4075  Alignment = DAG.getEVTAlignment(VT);
4076 
4077  AAMDNodes AAInfo;
4078  I.getAAMetadata(AAInfo);
4079  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4080 
4081  SDValue Root = DAG.getRoot();
4082  SDValue Base;
4083  SDValue Index;
4084  SDValue Scale;
4085  const Value *BasePtr = Ptr;
4086  bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
4087  bool ConstantMemory = false;
4088  if (UniformBase &&
4089  AA && AA->pointsToConstantMemory(MemoryLocation(
4090  BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
4091  AAInfo))) {
4092  // Do not serialize (non-volatile) loads of constant memory with anything.
4093  Root = DAG.getEntryNode();
4094  ConstantMemory = true;
4095  }
4096 
4097  MachineMemOperand *MMO =
4098  DAG.getMachineFunction().
4099  getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
4101  Alignment, AAInfo, Ranges);
4102 
4103  if (!UniformBase) {
4104  Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4105  Index = getValue(Ptr);
4106  Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4107  }
4108  SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
4109  SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
4110  Ops, MMO);
4111 
4112  SDValue OutChain = Gather.getValue(1);
4113  if (!ConstantMemory)
4114  PendingLoads.push_back(OutChain);
4115  setValue(&I, Gather);
4116 }
4117 
4118 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
4119  SDLoc dl = getCurSDLoc();
4120  AtomicOrdering SuccessOrder = I.getSuccessOrdering();
4121  AtomicOrdering FailureOrder = I.getFailureOrdering();
4122  SyncScope::ID SSID = I.getSyncScopeID();
4123 
4124  SDValue InChain = getRoot();
4125 
4126  MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
4127  SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
4128  SDValue L = DAG.getAtomicCmpSwap(
4129  ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
4130  getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
4132  /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID);
4133 
4134  SDValue OutChain = L.getValue(2);
4135 
4136  setValue(&I, L);
4137  DAG.setRoot(OutChain);
4138 }
4139 
4140 void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
4141  SDLoc dl = getCurSDLoc();
4142  ISD::NodeType NT;
4143  switch (I.getOperation()) {
4144  default: llvm_unreachable("Unknown atomicrmw operation");
4145  case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
4146  case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
4147  case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
4148  case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
4149  case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
4150  case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
4151  case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
4152  case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
4153  case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
4154  case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
4155  case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
4156  }
4157  AtomicOrdering Order = I.getOrdering();
4158  SyncScope::ID SSID = I.getSyncScopeID();
4159 
4160  SDValue InChain = getRoot();
4161 
4162  SDValue L =
4163  DAG.getAtomic(NT, dl,
4164  getValue(I.getValOperand()).getSimpleValueType(),
4165  InChain,
4166  getValue(I.getPointerOperand()),
4167  getValue(I.getValOperand()),
4168  I.getPointerOperand(),
4169  /* Alignment=*/ 0, Order, SSID);
4170 
4171  SDValue OutChain = L.getValue(1);
4172 
4173  setValue(&I, L);
4174  DAG.setRoot(OutChain);
4175 }
4176 
4177 void SelectionDAGBuilder::visitFence(const FenceInst &I) {
4178  SDLoc dl = getCurSDLoc();
4179  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4180  SDValue Ops[3];
4181  Ops[0] = getRoot();
4182  Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
4183  TLI.getFenceOperandTy(DAG.getDataLayout()));
4184  Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl,
4185  TLI.getFenceOperandTy(DAG.getDataLayout()));
4186  DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
4187 }
4188 
4189 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
4190  SDLoc dl = getCurSDLoc();
4191  AtomicOrdering Order = I.getOrdering();
4192  SyncScope::ID SSID = I.getSyncScopeID();
4193 
4194  SDValue InChain = getRoot();
4195 
4196  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4197  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4198 
4199  if (!TLI.supportsUnalignedAtomics() &&
4200  I.getAlignment() < VT.getStoreSize())
4201  report_fatal_error("Cannot generate unaligned atomic load");
4202 
4203  MachineMemOperand *MMO =
4204  DAG.getMachineFunction().
4205  getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
4208  VT.getStoreSize(),
4209  I.getAlignment() ? I.getAlignment() :
4210  DAG.getEVTAlignment(VT),
4211  AAMDNodes(), nullptr, SSID, Order);
4212 
4213  InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
4214  SDValue L =
4215  DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
4216  getValue(I.getPointerOperand()), MMO);
4217 
4218  SDValue OutChain = L.getValue(1);
4219 
4220  setValue(&I, L);
4221  DAG.setRoot(OutChain);
4222 }
4223 
4224 void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
4225  SDLoc dl = getCurSDLoc();
4226 
4227  AtomicOrdering Order = I.getOrdering();
4228  SyncScope::ID SSID = I.getSyncScopeID();
4229 
4230  SDValue InChain = getRoot();
4231 
4232  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4233  EVT VT =
4235 
4236  if (I.getAlignment() < VT.getStoreSize())
4237  report_fatal_error("Cannot generate unaligned atomic store");
4238 
4239  SDValue OutChain =
4240  DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
4241  InChain,
4242  getValue(I.getPointerOperand()),
4243  getValue(I.getValueOperand()),
4245  Order, SSID);
4246 
4247  DAG.setRoot(OutChain);
4248 }
4249 
4250 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
4251 /// node.
4252 void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
4253  unsigned Intrinsic) {
4254  // Ignore the callsite's attributes. A specific call site may be marked with
4255  // readnone, but the lowering code will expect the chain based on the
4256  // definition.
4257  const Function *F = I.getCalledFunction();
4258  bool HasChain = !F->doesNotAccessMemory();
4259  bool OnlyLoad = HasChain && F->onlyReadsMemory();
4260 
4261  // Build the operand list.
4263  if (HasChain) { // If this intrinsic has side-effects, chainify it.
4264  if (OnlyLoad) {
4265  // We don't need to serialize loads against other loads.
4266  Ops.push_back(DAG.getRoot());
4267  } else {
4268  Ops.push_back(getRoot());
4269  }
4270  }
4271 
4272  // Info is set by getTgtMemInstrinsic
4273  TargetLowering::IntrinsicInfo Info;
4274  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4275  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
4276  DAG.getMachineFunction(),
4277  Intrinsic);
4278 
4279  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
4280  if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
4281  Info.opc == ISD::INTRINSIC_W_CHAIN)
4282  Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
4283  TLI.getPointerTy(DAG.getDataLayout())));
4284 
4285  // Add all operands of the call to the operand list.
4286  for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
4287  SDValue Op = getValue(I.getArgOperand(i));
4288  Ops.push_back(Op);
4289  }
4290 
4292  ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
4293 
4294  if (HasChain)
4295  ValueVTs.push_back(MVT::Other);
4296 
4297  SDVTList VTs = DAG.getVTList(ValueVTs);
4298 
4299  // Create the node.
4300  SDValue Result;
4301  if (IsTgtIntrinsic) {
4302  // This is target intrinsic that touches memory
4303  Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs,
4304  Ops, Info.memVT,
4305  MachinePointerInfo(Info.ptrVal, Info.offset), Info.align,
4306  Info.flags, Info.size);
4307  } else if (!HasChain) {
4308  Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
4309  } else if (!I.getType()->isVoidTy()) {
4310  Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
4311  } else {
4312  Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
4313  }
4314 
4315  if (HasChain) {
4316  SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
4317  if (OnlyLoad)
4318  PendingLoads.push_back(Chain);
4319  else
4320  DAG.setRoot(Chain);
4321  }
4322 
4323  if (!I.getType()->isVoidTy()) {
4324  if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
4325  EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
4326  Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
4327  } else
4328  Result = lowerRangeToAssertZExt(DAG, I, Result);
4329 
4330  setValue(&I, Result);
4331  }
4332 }
4333 
4334 /// GetSignificand - Get the significand and build it into a floating-point
4335 /// number with exponent of 1:
4336 ///
4337 /// Op = (Op & 0x007fffff) | 0x3f800000;
4338 ///
4339 /// where Op is the hexadecimal representation of floating point value.
4341  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4342  DAG.getConstant(0x007fffff, dl, MVT::i32));
4343  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
4344  DAG.getConstant(0x3f800000, dl, MVT::i32));
4345  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
4346 }
4347 
4348 /// GetExponent - Get the exponent:
4349 ///
4350 /// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
4351 ///
4352 /// where Op is the hexadecimal representation of floating point value.
4354  const TargetLowering &TLI, const SDLoc &dl) {
4355  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4356  DAG.getConstant(0x7f800000, dl, MVT::i32));
4357  SDValue t1 = DAG.getNode(
4358  ISD::SRL, dl, MVT::i32, t0,
4359  DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
4360  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
4361  DAG.getConstant(127, dl, MVT::i32));
4362  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
4363 }
4364 
4365 /// getF32Constant - Get 32-bit floating point constant.
4366 static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
4367  const SDLoc &dl) {
4368  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
4369  MVT::f32);
4370 }
4371 
4373  SelectionDAG &DAG) {
4374  // TODO: What fast-math-flags should be set on the floating-point nodes?
4375 
4376  // IntegerPartOfX = ((int32_t)(t0);
4377  SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
4378 
4379  // FractionalPartOfX = t0 - (float)IntegerPartOfX;
4380  SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4381  SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
4382 
4383  // IntegerPartOfX <<= 23;
4384  IntegerPartOfX = DAG.getNode(
4385  ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4387  DAG.getDataLayout())));
4388 
4389  SDValue TwoToFractionalPartOfX;
4390  if (LimitFloatPrecision <= 6) {
4391  // For floating-point precision of 6:
4392  //
4393  // TwoToFractionalPartOfX =
4394  // 0.997535578f +
4395  // (0.735607626f + 0.252464424f * x) * x;
4396  //
4397  // error 0.0144103317, which is 6 bits
4398  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4399  getF32Constant(DAG, 0x3e814304, dl));
4400  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4401  getF32Constant(DAG, 0x3f3c50c8, dl));
4402  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4403  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4404  getF32Constant(DAG, 0x3f7f5e7e, dl));
4405  } else if (LimitFloatPrecision <= 12) {
4406  // For floating-point precision of 12:
4407  //
4408  // TwoToFractionalPartOfX =
4409  // 0.999892986f +
4410  // (0.696457318f +
4411  // (0.224338339f + 0.792043434e-1f * x) * x) * x;
4412  //
4413  // error 0.000107046256, which is 13 to 14 bits
4414  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4415  getF32Constant(DAG, 0x3da235e3, dl));
4416  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4417  getF32Constant(DAG, 0x3e65b8f3, dl));
4418  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4419  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4420  getF32Constant(DAG, 0x3f324b07, dl));
4421  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4422  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4423  getF32Constant(DAG, 0x3f7ff8fd, dl));
4424  } else { // LimitFloatPrecision <= 18
4425  // For floating-point precision of 18:
4426  //
4427  // TwoToFractionalPartOfX =
4428  // 0.999999982f +
4429  // (0.693148872f +
4430  // (0.240227044f +
4431  // (0.554906021e-1f +
4432  // (0.961591928e-2f +
4433  // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4434  // error 2.47208000*10^(-7), which is better than 18 bits
4435  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4436  getF32Constant(DAG, 0x3924b03e, dl));
4437  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4438  getF32Constant(DAG, 0x3ab24b87, dl));
4439  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4440  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4441  getF32Constant(DAG, 0x3c1d8c17, dl));
4442  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4443  SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4444  getF32Constant(DAG, 0x3d634a1d, dl));
4445  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4446  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4447  getF32Constant(DAG, 0x3e75fe14, dl));
4448  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4449  SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4450  getF32Constant(DAG, 0x3f317234, dl));
4451  SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4452  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4453  getF32Constant(DAG, 0x3f800000, dl));
4454  }
4455 
4456  // Add the exponent into the result in integer domain.
4457  SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
4458  return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4459  DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
4460 }
4461 
4462 /// expandExp - Lower an exp intrinsic. Handles the special sequences for
4463 /// limited-precision mode.
4464 static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4465  const TargetLowering &TLI) {
4466  if (Op.getValueType() == MVT::f32 &&
4467  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4468 
4469  // Put the exponent in the right bit position for later addition to the
4470  // final result:
4471  //
4472  // #define LOG2OFe 1.4426950f
4473  // t0 = Op * LOG2OFe
4474 
4475  // TODO: What fast-math-flags should be set here?
4476  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
4477  getF32Constant(DAG, 0x3fb8aa3b, dl));
4478  return getLimitedPrecisionExp2(t0, dl, DAG);
4479  }
4480 
4481  // No special expansion.
4482  return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
4483 }
4484 
4485 /// expandLog - Lower a log intrinsic. Handles the special sequences for
4486 /// limited-precision mode.
4487 static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4488  const TargetLowering &TLI) {
4489  // TODO: What fast-math-flags should be set on the floating-point nodes?
4490 
4491  if (Op.getValueType() == MVT::f32 &&
4492  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4493  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4494 
4495  // Scale the exponent by log(2) [0.69314718f].
4496  SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4497  SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4498  getF32Constant(DAG, 0x3f317218, dl));
4499 
4500  // Get the significand and build it into a floating-point number with
4501  // exponent of 1.
4502  SDValue X = GetSignificand(DAG, Op1, dl);
4503 
4504  SDValue LogOfMantissa;
4505  if (LimitFloatPrecision <= 6) {
4506  // For floating-point precision of 6:
4507  //
4508  // LogofMantissa =
4509  // -1.1609546f +
4510  // (1.4034025f - 0.23903021f * x) * x;
4511  //
4512  // error 0.0034276066, which is better than 8 bits
4513  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4514  getF32Constant(DAG, 0xbe74c456, dl));
4515  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4516  getF32Constant(DAG, 0x3fb3a2b1, dl));
4517  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4518  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4519  getF32Constant(DAG, 0x3f949a29, dl));
4520  } else if (LimitFloatPrecision <= 12) {
4521  // For floating-point precision of 12:
4522  //
4523  // LogOfMantissa =
4524  // -1.7417939f +
4525  // (2.8212026f +
4526  // (-1.4699568f +
4527  // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
4528  //
4529  // error 0.000061011436, which is 14 bits
4530  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4531  getF32Constant(DAG, 0xbd67b6d6, dl));
4532  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4533  getF32Constant(DAG, 0x3ee4f4b8, dl));
4534  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4535  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4536  getF32Constant(DAG, 0x3fbc278b, dl));
4537  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4538  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4539  getF32Constant(DAG, 0x40348e95, dl));
4540  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4541  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4542  getF32Constant(DAG, 0x3fdef31a, dl));
4543  } else { // LimitFloatPrecision <= 18
4544  // For floating-point precision of 18:
4545  //
4546  // LogOfMantissa =
4547  // -2.1072184f +
4548  // (4.2372794f +
4549  // (-3.7029485f +
4550  // (2.2781945f +
4551  // (-0.87823314f +
4552  // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
4553  //
4554  // error 0.0000023660568, which is better than 18 bits
4555  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4556  getF32Constant(DAG, 0xbc91e5ac, dl));
4557  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4558  getF32Constant(DAG, 0x3e4350aa, dl));
4559  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4560  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4561  getF32Constant(DAG, 0x3f60d3e3, dl));
4562  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4563  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4564  getF32Constant(DAG, 0x4011cdf0, dl));
4565  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4566  SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4567  getF32Constant(DAG, 0x406cfd1c, dl));
4568  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4569  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4570  getF32Constant(DAG, 0x408797cb, dl));
4571  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4572  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4573  getF32Constant(DAG, 0x4006dcab, dl));
4574  }
4575 
4576  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
4577  }
4578 
4579  // No special expansion.
4580  return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
4581 }
4582 
4583 /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
4584 /// limited-precision mode.
4585 static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4586  const TargetLowering &TLI) {
4587  // TODO: What fast-math-flags should be set on the floating-point nodes?
4588 
4589  if (Op.getValueType() == MVT::f32 &&
4590  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4591  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4592 
4593  // Get the exponent.
4594  SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
4595 
4596  // Get the significand and build it into a floating-point number with
4597  // exponent of 1.
4598  SDValue X = GetSignificand(DAG, Op1, dl);
4599 
4600  // Different possible minimax approximations of significand in
4601  // floating-point for various degrees of accuracy over [1,2].
4602  SDValue Log2ofMantissa;
4603  if (LimitFloatPrecision <= 6) {
4604  // For floating-point precision of 6:
4605  //
4606  // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
4607  //
4608  // error 0.0049451742, which is more than 7 bits
4609  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4610  getF32Constant(DAG, 0xbeb08fe0, dl));
4611  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4612  getF32Constant(DAG, 0x40019463, dl));
4613  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4614  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4615  getF32Constant(DAG, 0x3fd6633d, dl));
4616  } else if (LimitFloatPrecision <= 12) {
4617  // For floating-point precision of 12:
4618  //
4619  // Log2ofMantissa =
4620  // -2.51285454f +
4621  // (4.07009056f +
4622  // (-2.12067489f +
4623  // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
4624  //
4625  // error 0.0000876136000, which is better than 13 bits
4626  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4627  getF32Constant(DAG, 0xbda7262e, dl));
4628  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4629  getF32Constant(DAG, 0x3f25280b, dl));
4630  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4631  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4632  getF32Constant(DAG, 0x4007b923, dl));
4633  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4634  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4635  getF32Constant(DAG, 0x40823e2f, dl));
4636  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4637  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4638  getF32Constant(DAG, 0x4020d29c, dl));
4639  } else { // LimitFloatPrecision <= 18
4640  // For floating-point precision of 18:
4641  //
4642  // Log2ofMantissa =
4643  // -3.0400495f +
4644  // (6.1129976f +
4645  // (-5.3420409f +
4646  // (3.2865683f +
4647  // (-1.2669343f +
4648  // (0.27515199f -
4649  // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
4650  //
4651  // error 0.0000018516, which is better than 18 bits
4652  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4653  getF32Constant(DAG, 0xbcd2769e, dl));
4654  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4655  getF32Constant(DAG, 0x3e8ce0b9, dl));
4656  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4657  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4658  getF32Constant(DAG, 0x3fa22ae7, dl));
4659  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4660  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4661  getF32Constant(DAG, 0x40525723, dl));
4662  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4663  SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4664  getF32Constant(DAG, 0x40aaf200, dl));
4665  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4666  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4667  getF32Constant(DAG, 0x40c39dad, dl));
4668  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4669  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4670  getF32Constant(DAG, 0x4042902c, dl));
4671  }
4672 
4673  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
4674  }
4675 
4676  // No special expansion.
4677  return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
4678 }
4679 
4680 /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
4681 /// limited-precision mode.
4683  const TargetLowering &TLI) {
4684  // TODO: What fast-math-flags should be set on the floating-point nodes?
4685 
4686  if (Op.getValueType() == MVT::f32 &&
4687  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4688  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4689 
4690  // Scale the exponent by log10(2) [0.30102999f].
4691  SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4692  SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4693  getF32Constant(DAG, 0x3e9a209a, dl));
4694 
4695  // Get the significand and build it into a floating-point number with
4696  // exponent of 1.
4697  SDValue X = GetSignificand(DAG, Op1, dl);
4698 
4699  SDValue Log10ofMantissa;
4700  if (LimitFloatPrecision <= 6) {
4701  // For floating-point precision of 6:
4702  //
4703  // Log10ofMantissa =
4704  // -0.50419619f +
4705  // (0.60948995f - 0.10380950f * x) * x;
4706  //
4707  // error 0.0014886165, which is 6 bits
4708  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4709  getF32Constant(DAG, 0xbdd49a13, dl));
4710  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4711  getF32Constant(DAG, 0x3f1c0789, dl));
4712  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4713  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4714  getF32Constant(DAG, 0x3f011300, dl));
4715  } else if (LimitFloatPrecision <= 12) {
4716  // For floating-point precision of 12:
4717  //
4718  // Log10ofMantissa =
4719  // -0.64831180f +
4720  // (0.91751397f +
4721  // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
4722  //
4723  // error 0.00019228036, which is better than 12 bits
4724  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4725  getF32Constant(DAG, 0x3d431f31, dl));
4726  SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4727  getF32Constant(DAG, 0x3ea21fb2, dl));
4728  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4729  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4730  getF32Constant(DAG, 0x3f6ae232, dl));
4731  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4732  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4733  getF32Constant(DAG, 0x3f25f7c3, dl));
4734  } else { // LimitFloatPrecision <= 18
4735  // For floating-point precision of 18:
4736  //
4737  // Log10ofMantissa =
4738  // -0.84299375f +
4739  // (1.5327582f +
4740  // (-1.0688956f +
4741  // (0.49102474f +
4742  // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
4743  //
4744  // error 0.0000037995730, which is better than 18 bits
4745  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4746  getF32Constant(DAG, 0x3c5d51ce, dl));
4747  SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4748  getF32Constant(DAG, 0x3e00685a, dl));
4749  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4750  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4751  getF32Constant(DAG, 0x3efb6798, dl));
4752  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4753  SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4754  getF32Constant(DAG, 0x3f88d192, dl));
4755  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4756  SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4757  getF32Constant(DAG, 0x3fc4316c, dl));
4758  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4759  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
4760  getF32Constant(DAG, 0x3f57ce70, dl));
4761  }
4762 
4763  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
4764  }
4765 
4766  // No special expansion.
4767  return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
4768 }
4769 
4770 /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
4771 /// limited-precision mode.
4772 static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4773  const TargetLowering &TLI) {
4774  if (Op.getValueType() == MVT::f32 &&
4775  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
4776  return getLimitedPrecisionExp2(Op, dl, DAG);
4777 
4778  // No special expansion.
4779  return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
4780 }
4781 
4782 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
4783 /// limited-precision mode with x == 10.0f.
4784 static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
4785  SelectionDAG &DAG, const TargetLowering &TLI) {
4786  bool IsExp10 = false;
4787  if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
4788  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4789  if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
4790  APFloat Ten(10.0f);
4791  IsExp10 = LHSC->isExactlyValue(Ten);
4792  }
4793  }
4794 
4795  // TODO: What fast-math-flags should be set on the FMUL node?
4796  if (IsExp10) {
4797  // Put the exponent in the right bit position for later addition to the
4798  // final result:
4799  //
4800  // #define LOG2OF10 3.3219281f
4801  // t0 = Op * LOG2OF10;
4802  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
4803  getF32Constant(DAG, 0x40549a78, dl));
4804  return getLimitedPrecisionExp2(t0, dl, DAG);
4805  }
4806 
4807  // No special expansion.
4808  return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
4809 }
4810 
4811 /// ExpandPowI - Expand a llvm.powi intrinsic.
4812 static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
4813  SelectionDAG &DAG) {
4814  // If RHS is a constant, we can expand this out to a multiplication tree,
4815  // otherwise we end up lowering to a call to __powidf2 (for example). When
4816  // optimizing for size, we only want to do this if the expansion would produce
4817  // a small number of multiplies, otherwise we do the full expansion.
4818  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4819  // Get the exponent as a positive value.
4820  unsigned Val = RHSC->getSExtValue();
4821  if ((int)Val < 0) Val = -Val;
4822 
4823  // powi(x, 0) -> 1.0
4824  if (Val == 0)
4825  return DAG.getConstantFP(1.0, DL, LHS.getValueType());
4826 
4827  const Function &F = DAG.getMachineFunction().getFunction();
4828  if (!F.optForSize() ||
4829  // If optimizing for size, don't insert too many multiplies.
4830  // This inserts up to 5 multiplies.
4831  countPopulation(Val) + Log2_32(Val) < 7) {
4832  // We use the simple binary decomposition method to generate the multiply
4833  // sequence. There are more optimal ways to do this (for example,
4834  // powi(x,15) generates one more multiply than it should), but this has
4835  // the benefit of being both really simple and much better than a libcall.
4836  SDValue Res; // Logically starts equal to 1.0
4837  SDValue CurSquare = LHS;
4838  // TODO: Intrinsics should have fast-math-flags that propagate to these
4839  // nodes.
4840  while (Val) {
4841  if (Val & 1) {
4842  if (Res.getNode())
4843  Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
4844  else
4845  Res = CurSquare; // 1.0*CurSquare.
4846  }
4847 
4848  CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
4849  CurSquare, CurSquare);
4850  Val >>= 1;
4851  }
4852 
4853  // If the original was negative, invert the result, producing 1/(x*x*x).
4854  if (RHSC->getSExtValue() < 0)
4855  Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
4856  DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
4857  return Res;
4858  }
4859  }
4860 
4861  // Otherwise, expand to a libcall.
4862  return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
4863 }
4864 
4865 // getUnderlyingArgReg - Find underlying register used for a truncated or
4866 // bitcasted argument.
4867 static unsigned getUnderlyingArgReg(const SDValue &N) {
4868  switch (N.getOpcode()) {
4869  case ISD::CopyFromReg:
4870  return cast<RegisterSDNode>(N.getOperand(1))->getReg();
4871  case ISD::BITCAST:
4872  case ISD::AssertZext:
4873  case ISD::AssertSext:
4874  case ISD::TRUNCATE:
4875  return getUnderlyingArgReg(N.getOperand(0));
4876  default:
4877  return 0;
4878  }
4879 }
4880 
4881 /// If the DbgValueInst is a dbg_value of a function argument, create the
4882 /// corresponding DBG_VALUE machine instruction for it now. At the end of
4883 /// instruction selection, they will be inserted to the entry BB.
4884 bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
4885  const Value *V, DILocalVariable *Variable, DIExpression *Expr,
4886  DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
4887  const Argument *Arg = dyn_cast<Argument>(V);
4888  if (!Arg)
4889  return false;
4890 
4891  MachineFunction &MF = DAG.getMachineFunction();
4892  const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
4893 
4894  bool IsIndirect = false;
4896  // Some arguments' frame index is recorded during argument lowering.
4897  int FI = FuncInfo.getArgumentFrameIndex(Arg);
4898  if (FI != std::numeric_limits<int>::max())
4899  Op = MachineOperand::CreateFI(FI);
4900 
4901  if (!Op && N.getNode()) {
4902  unsigned Reg = getUnderlyingArgReg(N);
4903  if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
4904  MachineRegisterInfo &RegInfo = MF.getRegInfo();
4905  unsigned PR = RegInfo.getLiveInPhysReg(Reg);
4906  if (PR)
4907  Reg = PR;
4908  }
4909  if (Reg) {
4910  Op = MachineOperand::CreateReg(Reg, false);
4911  IsIndirect = IsDbgDeclare;
4912  }
4913  }
4914 
4915  if (!Op && N.getNode())
4916  // Check if frame index is available.
4917  if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
4918  if (FrameIndexSDNode *FINode =
4919  dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
4920  Op = MachineOperand::CreateFI(FINode->getIndex());
4921 
4922  if (!Op) {
4923  // Check if ValueMap has reg number.
4924  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
4925  if (VMI != FuncInfo.ValueMap.end()) {
4926  const auto &TLI = DAG.getTargetLoweringInfo();
4927  RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
4928  V->getType(), isABIRegCopy(V));
4929  if (RFV.occupiesMultipleRegs()) {
4930  unsigned Offset = 0;
4931  for (auto RegAndSize : RFV.getRegsAndSizes()) {
4932  Op = MachineOperand::CreateReg(RegAndSize.first, false);
4933  auto FragmentExpr = DIExpression::createFragmentExpression(
4934  Expr, Offset, RegAndSize.second);
4935  if (!FragmentExpr)
4936  continue;
4937  FuncInfo.ArgDbgValues.push_back(
4938  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
4939  Op->getReg(), Variable, *FragmentExpr));
4940  Offset += RegAndSize.second;
4941  }
4942  return true;
4943  }
4944  Op = MachineOperand::CreateReg(VMI->second, false);
4945  IsIndirect = IsDbgDeclare;
4946  }
4947  }
4948 
4949  if (!Op)
4950  return false;
4951 
4952  assert(Variable->isValidLocationForIntrinsic(DL) &&
4953  "Expected inlined-at fields to agree");
4954  if (Op->isReg())
4955  FuncInfo.ArgDbgValues.push_back(
4956  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
4957  Op->getReg(), Variable, Expr));
4958  else
4959  FuncInfo.ArgDbgValues.push_back(
4960  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
4961  .add(*Op)
4962  .addImm(0)
4963  .addMetadata(Variable)
4964  .addMetadata(Expr));
4965 
4966  return true;
4967 }
4968 
4969 /// Return the appropriate SDDbgValue based on N.
4970 SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
4971  DILocalVariable *Variable,
4972  DIExpression *Expr,
4973  const DebugLoc &dl,
4974  unsigned DbgSDNodeOrder) {
4975  if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
4976  // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
4977  // stack slot locations as such instead of as indirectly addressed
4978  // locations.
4979  return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl,
4980  DbgSDNodeOrder);
4981  }
4982  return DAG.