LLVM  7.0.0svn
HexagonISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the Hexagon target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "Hexagon.h"
15 #include "HexagonISelDAGToDAG.h"
16 #include "HexagonISelLowering.h"
18 #include "HexagonTargetMachine.h"
22 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/Support/Debug.h"
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "hexagon-isel"
28 
29 static
31 EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true),
32  cl::desc("Rebalance address calculation trees to improve "
33  "instruction selection"));
34 
35 // Rebalance only if this allows e.g. combining a GA with an offset or
36 // factoring out a shift.
37 static
39 RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false),
40  cl::desc("Rebalance address tree only if this allows optimizations"));
41 
42 static
44 RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden,
45  cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));
46 
47 static cl::opt<bool> CheckSingleUse("hexagon-isel-su", cl::Hidden,
48  cl::init(true), cl::desc("Enable checking of SDNode's single-use status"));
49 
50 //===----------------------------------------------------------------------===//
51 // Instruction Selector Implementation
52 //===----------------------------------------------------------------------===//
53 
54 #define GET_DAGISEL_BODY HexagonDAGToDAGISel
55 #include "HexagonGenDAGISel.inc"
56 
57 /// createHexagonISelDag - This pass converts a legalized DAG into a
58 /// Hexagon-specific DAG, ready for instruction scheduling.
59 ///
60 namespace llvm {
62  CodeGenOpt::Level OptLevel) {
63  return new HexagonDAGToDAGISel(TM, OptLevel);
64 }
65 }
66 
68  SDValue Chain = LD->getChain();
69  SDValue Base = LD->getBasePtr();
70  SDValue Offset = LD->getOffset();
71  int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
72  EVT LoadedVT = LD->getMemoryVT();
73  unsigned Opcode = 0;
74 
75  // Check for zero extended loads. Treat any-extend loads as zero extended
76  // loads.
77  ISD::LoadExtType ExtType = LD->getExtensionType();
78  bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
79  bool IsValidInc = HII->isValidAutoIncImm(LoadedVT, Inc);
80 
81  assert(LoadedVT.isSimple());
82  switch (LoadedVT.getSimpleVT().SimpleTy) {
83  case MVT::i8:
84  if (IsZeroExt)
85  Opcode = IsValidInc ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrub_io;
86  else
87  Opcode = IsValidInc ? Hexagon::L2_loadrb_pi : Hexagon::L2_loadrb_io;
88  break;
89  case MVT::i16:
90  if (IsZeroExt)
91  Opcode = IsValidInc ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadruh_io;
92  else
93  Opcode = IsValidInc ? Hexagon::L2_loadrh_pi : Hexagon::L2_loadrh_io;
94  break;
95  case MVT::i32:
96  case MVT::v2i16:
97  case MVT::v4i8:
98  Opcode = IsValidInc ? Hexagon::L2_loadri_pi : Hexagon::L2_loadri_io;
99  break;
100  case MVT::i64:
101  case MVT::v2i32:
102  case MVT::v4i16:
103  case MVT::v8i8:
104  Opcode = IsValidInc ? Hexagon::L2_loadrd_pi : Hexagon::L2_loadrd_io;
105  break;
106  case MVT::v64i8:
107  case MVT::v32i16:
108  case MVT::v16i32:
109  case MVT::v8i64:
110  case MVT::v128i8:
111  case MVT::v64i16:
112  case MVT::v32i32:
113  case MVT::v16i64:
114  if (isAlignedMemNode(LD)) {
115  if (LD->isNonTemporal())
116  Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai;
117  else
118  Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai;
119  } else {
120  Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai;
121  }
122  break;
123  default:
124  llvm_unreachable("Unexpected memory type in indexed load");
125  }
126 
127  SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
129  MemOp[0] = LD->getMemOperand();
130 
131  auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl)
132  -> MachineSDNode* {
133  if (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD) {
134  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
135  return CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64,
136  Zero, SDValue(N, 0));
137  }
138  if (ExtType == ISD::SEXTLOAD)
139  return CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
140  SDValue(N, 0));
141  return N;
142  };
143 
144  // Loaded value Next address Chain
145  SDValue From[3] = { SDValue(LD,0), SDValue(LD,1), SDValue(LD,2) };
146  SDValue To[3];
147 
148  EVT ValueVT = LD->getValueType(0);
149  if (ValueVT == MVT::i64 && ExtType != ISD::NON_EXTLOAD) {
150  // A load extending to i64 will actually produce i32, which will then
151  // need to be extended to i64.
152  assert(LoadedVT.getSizeInBits() <= 32);
153  ValueVT = MVT::i32;
154  }
155 
156  if (IsValidInc) {
157  MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT,
158  MVT::i32, MVT::Other, Base,
159  IncV, Chain);
160  L->setMemRefs(MemOp, MemOp+1);
161  To[1] = SDValue(L, 1); // Next address.
162  To[2] = SDValue(L, 2); // Chain.
163  // Handle special case for extension to i64.
164  if (LD->getValueType(0) == MVT::i64)
165  L = getExt64(L, dl);
166  To[0] = SDValue(L, 0); // Loaded (extended) value.
167  } else {
168  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
169  MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other,
170  Base, Zero, Chain);
171  L->setMemRefs(MemOp, MemOp+1);
172  To[2] = SDValue(L, 1); // Chain.
173  MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
174  Base, IncV);
175  To[1] = SDValue(A, 0); // Next address.
176  // Handle special case for extension to i64.
177  if (LD->getValueType(0) == MVT::i64)
178  L = getExt64(L, dl);
179  To[0] = SDValue(L, 0); // Loaded (extended) value.
180  }
181  ReplaceUses(From, To, 3);
182  CurDAG->RemoveDeadNode(LD);
183 }
184 
186  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
187  return nullptr;
188 
189  SDLoc dl(IntN);
190  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
191 
192  static std::map<unsigned,unsigned> LoadPciMap = {
193  { Intrinsic::hexagon_circ_ldb, Hexagon::L2_loadrb_pci },
194  { Intrinsic::hexagon_circ_ldub, Hexagon::L2_loadrub_pci },
195  { Intrinsic::hexagon_circ_ldh, Hexagon::L2_loadrh_pci },
196  { Intrinsic::hexagon_circ_lduh, Hexagon::L2_loadruh_pci },
197  { Intrinsic::hexagon_circ_ldw, Hexagon::L2_loadri_pci },
198  { Intrinsic::hexagon_circ_ldd, Hexagon::L2_loadrd_pci },
199  };
200  auto FLC = LoadPciMap.find(IntNo);
201  if (FLC != LoadPciMap.end()) {
202  EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i64 : MVT::i32;
203  EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
204  // Operands: { Base, Increment, Modifier, Chain }
205  auto Inc = cast<ConstantSDNode>(IntN->getOperand(5));
206  SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), dl, MVT::i32);
207  MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys,
208  { IntN->getOperand(2), I, IntN->getOperand(4),
209  IntN->getOperand(0) });
210  return Res;
211  }
212 
213  return nullptr;
214 }
215 
217  SDNode *IntN) {
218  // The "LoadN" is just a machine load instruction. The intrinsic also
219  // involves storing it. Generate an appropriate store to the location
220  // given in the intrinsic's operand(3).
221  uint64_t F = HII->get(LoadN->getMachineOpcode()).TSFlags;
222  unsigned SizeBits = (F >> HexagonII::MemAccessSizePos) &
224  unsigned Size = 1U << (SizeBits-1);
225 
226  SDLoc dl(IntN);
228  SDValue TS;
229  SDValue Loc = IntN->getOperand(3);
230 
231  if (Size >= 4)
232  TS = CurDAG->getStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, PI,
233  Size);
234  else
235  TS = CurDAG->getTruncStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc,
236  PI, MVT::getIntegerVT(Size * 8), Size);
237 
238  SDNode *StoreN;
239  {
240  HandleSDNode Handle(TS);
241  SelectStore(TS.getNode());
242  StoreN = Handle.getValue().getNode();
243  }
244 
245  // Load's results are { Loaded value, Updated pointer, Chain }
246  ReplaceUses(SDValue(IntN, 0), SDValue(LoadN, 1));
247  ReplaceUses(SDValue(IntN, 1), SDValue(StoreN, 0));
248  return StoreN;
249 }
250 
252  // The intrinsics for load circ/brev perform two operations:
253  // 1. Load a value V from the specified location, using the addressing
254  // mode corresponding to the intrinsic.
255  // 2. Store V into a specified location. This location is typically a
256  // local, temporary object.
257  // In many cases, the program using these intrinsics will immediately
258  // load V again from the local object. In those cases, when certain
259  // conditions are met, the last load can be removed.
260  // This function identifies and optimizes this pattern. If the pattern
261  // cannot be optimized, it returns nullptr, which will cause the load
262  // to be selected separately from the intrinsic (which will be handled
263  // in SelectIntrinsicWChain).
264 
265  SDValue Ch = N->getOperand(0);
266  SDValue Loc = N->getOperand(1);
267 
268  // Assume that the load and the intrinsic are connected directly with a
269  // chain:
270  // t1: i32,ch = int.load ..., ..., ..., Loc, ... // <-- C
271  // t2: i32,ch = load t1:1, Loc, ...
272  SDNode *C = Ch.getNode();
273 
274  if (C->getOpcode() != ISD::INTRINSIC_W_CHAIN)
275  return false;
276 
277  // The second load can only be eliminated if its extension type matches
278  // that of the load instruction corresponding to the intrinsic. The user
279  // can provide an address of an unsigned variable to store the result of
280  // a sign-extending intrinsic into (or the other way around).
281  ISD::LoadExtType IntExt;
282  switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) {
283  case Intrinsic::hexagon_circ_ldub:
284  case Intrinsic::hexagon_circ_lduh:
285  IntExt = ISD::ZEXTLOAD;
286  break;
287  case Intrinsic::hexagon_circ_ldw:
288  case Intrinsic::hexagon_circ_ldd:
289  IntExt = ISD::NON_EXTLOAD;
290  break;
291  default:
292  IntExt = ISD::SEXTLOAD;
293  break;
294  }
295  if (N->getExtensionType() != IntExt)
296  return false;
297 
298  // Make sure the target location for the loaded value in the load intrinsic
299  // is the location from which LD (or N) is loading.
300  if (C->getNumOperands() < 4 || Loc.getNode() != C->getOperand(3).getNode())
301  return false;
302 
305  SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) };
306  SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) };
307  ReplaceUses(F, T, array_lengthof(T));
308  // This transformation will leave the intrinsic dead. If it remains in
309  // the DAG, the selection code will see it again, but without the load,
310  // and it will generate a store that is normally required for it.
312  return true;
313  }
314  return false;
315 }
316 
317 // Convert the bit-reverse load intrinsic to appropriate target instruction.
319  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
320  return false;
321 
322  const SDLoc &dl(IntN);
323  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
324 
325  static const std::map<unsigned, unsigned> LoadBrevMap = {
326  { Intrinsic::hexagon_L2_loadrb_pbr, Hexagon::L2_loadrb_pbr },
327  { Intrinsic::hexagon_L2_loadrub_pbr, Hexagon::L2_loadrub_pbr },
328  { Intrinsic::hexagon_L2_loadrh_pbr, Hexagon::L2_loadrh_pbr },
329  { Intrinsic::hexagon_L2_loadruh_pbr, Hexagon::L2_loadruh_pbr },
330  { Intrinsic::hexagon_L2_loadri_pbr, Hexagon::L2_loadri_pbr },
331  { Intrinsic::hexagon_L2_loadrd_pbr, Hexagon::L2_loadrd_pbr }
332  };
333  auto FLI = LoadBrevMap.find(IntNo);
334  if (FLI != LoadBrevMap.end()) {
335  EVT ValTy =
336  (IntNo == Intrinsic::hexagon_L2_loadrd_pbr) ? MVT::i64 : MVT::i32;
337  EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
338  // Operands of Intrinsic: {chain, enum ID of intrinsic, baseptr,
339  // modifier}.
340  // Operands of target instruction: { Base, Modifier, Chain }.
342  FLI->second, dl, RTys,
343  {IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(0)});
344 
346  MemOp[0] = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
347  Res->setMemRefs(MemOp, MemOp + 1);
348 
349  ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
350  ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
351  ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
352  CurDAG->RemoveDeadNode(IntN);
353  return true;
354  }
355  return false;
356 }
357 
358 /// Generate a machine instruction node for the new circlar buffer intrinsics.
359 /// The new versions use a CSx register instead of the K field.
361  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
362  return false;
363 
364  SDLoc DL(IntN);
365  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
367 
368  static std::map<unsigned,unsigned> LoadNPcMap = {
369  { Intrinsic::hexagon_L2_loadrub_pci, Hexagon::PS_loadrub_pci },
370  { Intrinsic::hexagon_L2_loadrb_pci, Hexagon::PS_loadrb_pci },
371  { Intrinsic::hexagon_L2_loadruh_pci, Hexagon::PS_loadruh_pci },
372  { Intrinsic::hexagon_L2_loadrh_pci, Hexagon::PS_loadrh_pci },
373  { Intrinsic::hexagon_L2_loadri_pci, Hexagon::PS_loadri_pci },
374  { Intrinsic::hexagon_L2_loadrd_pci, Hexagon::PS_loadrd_pci },
375  { Intrinsic::hexagon_L2_loadrub_pcr, Hexagon::PS_loadrub_pcr },
376  { Intrinsic::hexagon_L2_loadrb_pcr, Hexagon::PS_loadrb_pcr },
377  { Intrinsic::hexagon_L2_loadruh_pcr, Hexagon::PS_loadruh_pcr },
378  { Intrinsic::hexagon_L2_loadrh_pcr, Hexagon::PS_loadrh_pcr },
379  { Intrinsic::hexagon_L2_loadri_pcr, Hexagon::PS_loadri_pcr },
380  { Intrinsic::hexagon_L2_loadrd_pcr, Hexagon::PS_loadrd_pcr }
381  };
382  auto FLI = LoadNPcMap.find (IntNo);
383  if (FLI != LoadNPcMap.end()) {
384  EVT ValTy = MVT::i32;
385  if (IntNo == Intrinsic::hexagon_L2_loadrd_pci ||
386  IntNo == Intrinsic::hexagon_L2_loadrd_pcr)
387  ValTy = MVT::i64;
388  EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
389  // Handle load.*_pci case which has 6 operands.
390  if (IntN->getNumOperands() == 6) {
391  auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
392  SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
393  // Operands: { Base, Increment, Modifier, Start, Chain }.
394  Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
395  IntN->getOperand(0) };
396  } else
397  // Handle load.*_pcr case which has 5 operands.
398  // Operands: { Base, Modifier, Start, Chain }.
399  Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
400  IntN->getOperand(0) };
401  MachineSDNode *Res = CurDAG->getMachineNode(FLI->second, DL, RTys, Ops);
402  ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
403  ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
404  ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
405  CurDAG->RemoveDeadNode(IntN);
406  return true;
407  }
408 
409  static std::map<unsigned,unsigned> StoreNPcMap = {
410  { Intrinsic::hexagon_S2_storerb_pci, Hexagon::PS_storerb_pci },
411  { Intrinsic::hexagon_S2_storerh_pci, Hexagon::PS_storerh_pci },
412  { Intrinsic::hexagon_S2_storerf_pci, Hexagon::PS_storerf_pci },
413  { Intrinsic::hexagon_S2_storeri_pci, Hexagon::PS_storeri_pci },
414  { Intrinsic::hexagon_S2_storerd_pci, Hexagon::PS_storerd_pci },
415  { Intrinsic::hexagon_S2_storerb_pcr, Hexagon::PS_storerb_pcr },
416  { Intrinsic::hexagon_S2_storerh_pcr, Hexagon::PS_storerh_pcr },
417  { Intrinsic::hexagon_S2_storerf_pcr, Hexagon::PS_storerf_pcr },
418  { Intrinsic::hexagon_S2_storeri_pcr, Hexagon::PS_storeri_pcr },
419  { Intrinsic::hexagon_S2_storerd_pcr, Hexagon::PS_storerd_pcr }
420  };
421  auto FSI = StoreNPcMap.find (IntNo);
422  if (FSI != StoreNPcMap.end()) {
423  EVT RTys[] = { MVT::i32, MVT::Other };
424  // Handle store.*_pci case which has 7 operands.
425  if (IntN->getNumOperands() == 7) {
426  auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
427  SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
428  // Operands: { Base, Increment, Modifier, Value, Start, Chain }.
429  Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
430  IntN->getOperand(6), IntN->getOperand(0) };
431  } else
432  // Handle store.*_pcr case which has 6 operands.
433  // Operands: { Base, Modifier, Value, Start, Chain }.
434  Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
435  IntN->getOperand(5), IntN->getOperand(0) };
436  MachineSDNode *Res = CurDAG->getMachineNode(FSI->second, DL, RTys, Ops);
437  ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
438  ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
439  CurDAG->RemoveDeadNode(IntN);
440  return true;
441  }
442 
443  return false;
444 }
445 
447  SDLoc dl(N);
448  LoadSDNode *LD = cast<LoadSDNode>(N);
449 
450  // Handle indexed loads.
452  if (AM != ISD::UNINDEXED) {
453  SelectIndexedLoad(LD, dl);
454  return;
455  }
456 
457  // Handle patterns using circ/brev load intrinsics.
458  if (tryLoadOfLoadIntrinsic(LD))
459  return;
460 
461  SelectCode(LD);
462 }
463 
465  SDValue Chain = ST->getChain();
466  SDValue Base = ST->getBasePtr();
467  SDValue Offset = ST->getOffset();
468  SDValue Value = ST->getValue();
469  // Get the constant value.
470  int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
471  EVT StoredVT = ST->getMemoryVT();
472  EVT ValueVT = Value.getValueType();
473 
474  bool IsValidInc = HII->isValidAutoIncImm(StoredVT, Inc);
475  unsigned Opcode = 0;
476 
477  assert(StoredVT.isSimple());
478  switch (StoredVT.getSimpleVT().SimpleTy) {
479  case MVT::i8:
480  Opcode = IsValidInc ? Hexagon::S2_storerb_pi : Hexagon::S2_storerb_io;
481  break;
482  case MVT::i16:
483  Opcode = IsValidInc ? Hexagon::S2_storerh_pi : Hexagon::S2_storerh_io;
484  break;
485  case MVT::i32:
486  case MVT::v2i16:
487  case MVT::v4i8:
488  Opcode = IsValidInc ? Hexagon::S2_storeri_pi : Hexagon::S2_storeri_io;
489  break;
490  case MVT::i64:
491  case MVT::v2i32:
492  case MVT::v4i16:
493  case MVT::v8i8:
494  Opcode = IsValidInc ? Hexagon::S2_storerd_pi : Hexagon::S2_storerd_io;
495  break;
496  case MVT::v64i8:
497  case MVT::v32i16:
498  case MVT::v16i32:
499  case MVT::v8i64:
500  case MVT::v128i8:
501  case MVT::v64i16:
502  case MVT::v32i32:
503  case MVT::v16i64:
504  if (isAlignedMemNode(ST)) {
505  if (ST->isNonTemporal())
506  Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai;
507  else
508  Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai;
509  } else {
510  Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai;
511  }
512  break;
513  default:
514  llvm_unreachable("Unexpected memory type in indexed store");
515  }
516 
517  if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
518  assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
519  Value = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo,
520  dl, MVT::i32, Value);
521  }
522 
523  SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
525  MemOp[0] = ST->getMemOperand();
526 
527  // Next address Chain
528  SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) };
529  SDValue To[2];
530 
531  if (IsValidInc) {
532  // Build post increment store.
533  SDValue Ops[] = { Base, IncV, Value, Chain };
535  Ops);
536  S->setMemRefs(MemOp, MemOp + 1);
537  To[0] = SDValue(S, 0);
538  To[1] = SDValue(S, 1);
539  } else {
540  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
541  SDValue Ops[] = { Base, Zero, Value, Chain };
542  MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
543  S->setMemRefs(MemOp, MemOp + 1);
544  To[1] = SDValue(S, 0);
545  MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
546  Base, IncV);
547  To[0] = SDValue(A, 0);
548  }
549 
550  ReplaceUses(From, To, 2);
551  CurDAG->RemoveDeadNode(ST);
552 }
553 
555  SDLoc dl(N);
556  StoreSDNode *ST = cast<StoreSDNode>(N);
557 
558  // Handle indexed stores.
560  if (AM != ISD::UNINDEXED) {
561  SelectIndexedStore(ST, dl);
562  return;
563  }
564 
565  SelectCode(ST);
566 }
567 
569  SDLoc dl(N);
570  SDValue Shl_0 = N->getOperand(0);
571  SDValue Shl_1 = N->getOperand(1);
572 
573  auto Default = [this,N] () -> void { SelectCode(N); };
574 
575  if (N->getValueType(0) != MVT::i32 || Shl_1.getOpcode() != ISD::Constant)
576  return Default();
577 
578  // RHS is const.
579  int32_t ShlConst = cast<ConstantSDNode>(Shl_1)->getSExtValue();
580 
581  if (Shl_0.getOpcode() == ISD::MUL) {
582  SDValue Mul_0 = Shl_0.getOperand(0); // Val
583  SDValue Mul_1 = Shl_0.getOperand(1); // Const
584  // RHS of mul is const.
585  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mul_1)) {
586  int32_t ValConst = C->getSExtValue() << ShlConst;
587  if (isInt<9>(ValConst)) {
588  SDValue Val = CurDAG->getTargetConstant(ValConst, dl, MVT::i32);
589  SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
590  MVT::i32, Mul_0, Val);
591  ReplaceNode(N, Result);
592  return;
593  }
594  }
595  return Default();
596  }
597 
598  if (Shl_0.getOpcode() == ISD::SUB) {
599  SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
600  SDValue Sub_1 = Shl_0.getOperand(1); // Val
601  if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Sub_0)) {
602  if (C1->getSExtValue() != 0 || Sub_1.getOpcode() != ISD::SHL)
603  return Default();
604  SDValue Shl2_0 = Sub_1.getOperand(0); // Val
605  SDValue Shl2_1 = Sub_1.getOperand(1); // Const
606  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(Shl2_1)) {
607  int32_t ValConst = 1 << (ShlConst + C2->getSExtValue());
608  if (isInt<9>(-ValConst)) {
609  SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, MVT::i32);
610  SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
611  MVT::i32, Shl2_0, Val);
612  ReplaceNode(N, Result);
613  return;
614  }
615  }
616  }
617  }
618 
619  return Default();
620 }
621 
622 //
623 // Handling intrinsics for circular load and bitreverse load.
624 //
629  return;
630  }
631 
632  // Handle bit-reverse load intrinsics.
633  if (SelectBrevLdIntrinsic(N))
634  return;
635 
636  if (SelectNewCircIntrinsic(N))
637  return;
638 
639  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
640  if (IntNo == Intrinsic::hexagon_V6_vgathermw ||
641  IntNo == Intrinsic::hexagon_V6_vgathermw_128B ||
642  IntNo == Intrinsic::hexagon_V6_vgathermh ||
643  IntNo == Intrinsic::hexagon_V6_vgathermh_128B ||
644  IntNo == Intrinsic::hexagon_V6_vgathermhw ||
645  IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) {
646  SelectV65Gather(N);
647  return;
648  }
649  if (IntNo == Intrinsic::hexagon_V6_vgathermwq ||
650  IntNo == Intrinsic::hexagon_V6_vgathermwq_128B ||
651  IntNo == Intrinsic::hexagon_V6_vgathermhq ||
652  IntNo == Intrinsic::hexagon_V6_vgathermhq_128B ||
653  IntNo == Intrinsic::hexagon_V6_vgathermhwq ||
654  IntNo == Intrinsic::hexagon_V6_vgathermhwq_128B) {
656  return;
657  }
658 
659  SelectCode(N);
660 }
661 
663  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
664  unsigned Bits;
665  switch (IID) {
666  case Intrinsic::hexagon_S2_vsplatrb:
667  Bits = 8;
668  break;
669  case Intrinsic::hexagon_S2_vsplatrh:
670  Bits = 16;
671  break;
672  case Intrinsic::hexagon_V6_vaddcarry:
673  case Intrinsic::hexagon_V6_vaddcarry_128B:
674  case Intrinsic::hexagon_V6_vsubcarry:
675  case Intrinsic::hexagon_V6_vsubcarry_128B:
677  return;
678  default:
679  SelectCode(N);
680  return;
681  }
682 
683  SDValue V = N->getOperand(1);
684  SDValue U;
685  if (keepsLowBits(V, Bits, U)) {
686  SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
687  N->getOperand(0), U);
688  ReplaceNode(N, R.getNode());
689  SelectCode(R.getNode());
690  return;
691  }
692  SelectCode(N);
693 }
694 
695 //
696 // Map floating point constant values.
697 //
699  SDLoc dl(N);
701  APInt A = CN->getValueAPF().bitcastToAPInt();
702  if (N->getValueType(0) == MVT::f32) {
703  SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32);
704  ReplaceNode(N, CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, V));
705  return;
706  }
707  if (N->getValueType(0) == MVT::f64) {
708  SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i64);
709  ReplaceNode(N, CurDAG->getMachineNode(Hexagon::CONST64, dl, MVT::f64, V));
710  return;
711  }
712 
713  SelectCode(N);
714 }
715 
716 //
717 // Map boolean values.
718 //
720  if (N->getValueType(0) == MVT::i1) {
721  assert(!(cast<ConstantSDNode>(N)->getZExtValue() >> 1));
722  unsigned Opc = (cast<ConstantSDNode>(N)->getSExtValue() != 0)
723  ? Hexagon::PS_true
724  : Hexagon::PS_false;
726  return;
727  }
728 
729  SelectCode(N);
730 }
731 
733  MachineFrameInfo &MFI = MF->getFrameInfo();
734  const HexagonFrameLowering *HFI = HST->getFrameLowering();
735  int FX = cast<FrameIndexSDNode>(N)->getIndex();
736  unsigned StkA = HFI->getStackAlignment();
737  unsigned MaxA = MFI.getMaxAlignment();
739  SDLoc DL(N);
740  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
741  SDNode *R = nullptr;
742 
743  // Use PS_fi when:
744  // - the object is fixed, or
745  // - there are no objects with higher-than-default alignment, or
746  // - there are no dynamically allocated objects.
747  // Otherwise, use PS_fia.
748  if (FX < 0 || MaxA <= StkA || !MFI.hasVarSizedObjects()) {
749  R = CurDAG->getMachineNode(Hexagon::PS_fi, DL, MVT::i32, FI, Zero);
750  } else {
751  auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
752  unsigned AR = HMFI.getStackAlignBaseVReg();
753  SDValue CH = CurDAG->getEntryNode();
754  SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
755  R = CurDAG->getMachineNode(Hexagon::PS_fia, DL, MVT::i32, Ops);
756  }
757 
758  ReplaceNode(N, R);
759 }
760 
761 
763  EVT SVT = N->getOperand(0).getValueType();
764  EVT DVT = N->getValueType(0);
765  if (!SVT.isVector() || !DVT.isVector() ||
766  SVT.getVectorElementType() == MVT::i1 ||
767  DVT.getVectorElementType() == MVT::i1 ||
768  SVT.getSizeInBits() != DVT.getSizeInBits()) {
769  SelectCode(N);
770  return;
771  }
772 
773  ReplaceUses(SDValue(N, 0), N->getOperand(0));
775 }
776 
778  MVT ResTy = N->getValueType(0).getSimpleVT();
779  if (HST->isHVXVectorType(ResTy, true))
780  return SelectHvxVAlign(N);
781 
782  const SDLoc &dl(N);
783  unsigned VecLen = ResTy.getSizeInBits();
784  if (VecLen == 32) {
785  SDValue Ops[] = {
786  CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
787  N->getOperand(0),
788  CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
789  N->getOperand(1),
790  CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
791  };
792  SDNode *R = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
793  MVT::i64, Ops);
794 
795  // Shift right by "(Addr & 0x3) * 8" bytes.
796  SDValue M0 = CurDAG->getTargetConstant(0x18, dl, MVT::i32);
797  SDValue M1 = CurDAG->getTargetConstant(0x03, dl, MVT::i32);
798  SDNode *C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32,
799  M0, N->getOperand(2), M1);
800  SDNode *S = CurDAG->getMachineNode(Hexagon::S2_lsr_r_p, dl, MVT::i64,
801  SDValue(R, 0), SDValue(C, 0));
802  SDValue E = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, ResTy,
803  SDValue(S, 0));
804  ReplaceNode(N, E.getNode());
805  } else {
806  assert(VecLen == 64);
807  SDNode *Pu = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::v8i1,
808  N->getOperand(2));
809  SDNode *VA = CurDAG->getMachineNode(Hexagon::S2_valignrb, dl, ResTy,
810  N->getOperand(0), N->getOperand(1),
811  SDValue(Pu,0));
812  ReplaceNode(N, VA);
813  }
814 }
815 
817  const SDLoc &dl(N);
818  SDValue A = N->getOperand(1);
819  int Mask = -cast<ConstantSDNode>(A.getNode())->getSExtValue();
820  assert(isPowerOf2_32(-Mask));
821 
822  SDValue M = CurDAG->getTargetConstant(Mask, dl, MVT::i32);
823  SDNode *AA = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
824  N->getOperand(0), M);
825  ReplaceNode(N, AA);
826 }
827 
828 // Handle these nodes here to avoid having to write patterns for all
829 // combinations of input/output types. In all cases, the resulting
830 // instruction is the same.
832  SDValue Op = N->getOperand(0);
833  MVT OpTy = Op.getValueType().getSimpleVT();
834  SDNode *T = CurDAG->MorphNodeTo(N, N->getOpcode(),
835  CurDAG->getVTList(OpTy), {Op});
836  ReplaceNode(T, Op.getNode());
837 }
838 
840  MVT ResTy = N->getValueType(0).getSimpleVT();
841  SDNode *T = CurDAG->getMachineNode(Hexagon::C2_mask, SDLoc(N), ResTy,
842  N->getOperand(0));
843  ReplaceNode(N, T);
844 }
845 
847  const SDLoc &dl(N);
848  MVT ResTy = N->getValueType(0).getSimpleVT();
849  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
850  SDNode *T = CurDAG->getMachineNode(Hexagon::A4_vcmpbgtui, dl, ResTy,
851  N->getOperand(0), Zero);
852  ReplaceNode(N, T);
853 }
854 
856  const SDLoc &dl(N);
857  MVT ResTy = N->getValueType(0).getSimpleVT();
858 
860  SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
861  SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandvrt, dl, ResTy,
862  N->getOperand(0), SDValue(R,0));
863  ReplaceNode(N, T);
864 }
865 
867  const SDLoc &dl(N);
868  MVT ResTy = N->getValueType(0).getSimpleVT();
869 
871  SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
872  SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandqrt, dl, ResTy,
873  N->getOperand(0), SDValue(R,0));
874  ReplaceNode(N, T);
875 }
876 
878  if (N->isMachineOpcode())
879  return N->setNodeId(-1); // Already selected.
880 
881  switch (N->getOpcode()) {
882  case ISD::Constant: return SelectConstant(N);
883  case ISD::ConstantFP: return SelectConstantFP(N);
884  case ISD::FrameIndex: return SelectFrameIndex(N);
885  case ISD::BITCAST: return SelectBitcast(N);
886  case ISD::SHL: return SelectSHL(N);
887  case ISD::LOAD: return SelectLoad(N);
888  case ISD::STORE: return SelectStore(N);
891  case HexagonISD::VALIGN: return SelectVAlign(N);
893  case HexagonISD::TYPECAST: return SelectTypecast(N);
894  case HexagonISD::P2D: return SelectP2D(N);
895  case HexagonISD::D2P: return SelectD2P(N);
896  case HexagonISD::Q2V: return SelectQ2V(N);
897  case HexagonISD::V2Q: return SelectV2Q(N);
898  }
899 
900  if (HST->useHVXOps()) {
901  switch (N->getOpcode()) {
902  case ISD::VECTOR_SHUFFLE: return SelectHvxShuffle(N);
903  case HexagonISD::VROR: return SelectHvxRor(N);
904  }
905  }
906 
907  SelectCode(N);
908 }
909 
911 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
912  std::vector<SDValue> &OutOps) {
913  SDValue Inp = Op, Res;
914 
915  switch (ConstraintID) {
916  default:
917  return true;
919  case InlineAsm::Constraint_o: // Offsetable.
920  case InlineAsm::Constraint_v: // Not offsetable.
921  case InlineAsm::Constraint_m: // Memory.
922  if (SelectAddrFI(Inp, Res))
923  OutOps.push_back(Res);
924  else
925  OutOps.push_back(Inp);
926  break;
927  }
928 
929  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
930  return false;
931 }
932 
933 
934 static bool isMemOPCandidate(SDNode *I, SDNode *U) {
935  // I is an operand of U. Check if U is an arithmetic (binary) operation
936  // usable in a memop, where the other operand is a loaded value, and the
937  // result of U is stored in the same location.
938 
939  if (!U->hasOneUse())
940  return false;
941  unsigned Opc = U->getOpcode();
942  switch (Opc) {
943  case ISD::ADD:
944  case ISD::SUB:
945  case ISD::AND:
946  case ISD::OR:
947  break;
948  default:
949  return false;
950  }
951 
952  SDValue S0 = U->getOperand(0);
953  SDValue S1 = U->getOperand(1);
954  SDValue SY = (S0.getNode() == I) ? S1 : S0;
955 
956  SDNode *UUse = *U->use_begin();
957  if (UUse->getNumValues() != 1)
958  return false;
959 
960  // Check if one of the inputs to U is a load instruction and the output
961  // is used by a store instruction. If so and they also have the same
962  // base pointer, then don't preoprocess this node sequence as it
963  // can be matched to a memop.
964  SDNode *SYNode = SY.getNode();
965  if (UUse->getOpcode() == ISD::STORE && SYNode->getOpcode() == ISD::LOAD) {
966  SDValue LDBasePtr = cast<MemSDNode>(SYNode)->getBasePtr();
967  SDValue STBasePtr = cast<MemSDNode>(UUse)->getBasePtr();
968  if (LDBasePtr == STBasePtr)
969  return true;
970  }
971  return false;
972 }
973 
974 
975 // Transform: (or (select c x 0) z) -> (select c (or x z) z)
976 // (or (select c 0 y) z) -> (select c z (or y z))
977 void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
978  SelectionDAG &DAG = *CurDAG;
979 
980  for (auto I : Nodes) {
981  if (I->getOpcode() != ISD::OR)
982  continue;
983 
984  auto IsZero = [] (const SDValue &V) -> bool {
985  if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode()))
986  return SC->isNullValue();
987  return false;
988  };
989  auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
990  if (Op.getOpcode() != ISD::SELECT)
991  return false;
992  return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2));
993  };
994 
995  SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
996  EVT VT = I->getValueType(0);
997  bool SelN0 = IsSelect0(N0);
998  SDValue SOp = SelN0 ? N0 : N1;
999  SDValue VOp = SelN0 ? N1 : N0;
1000 
1001  if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) {
1002  SDValue SC = SOp.getOperand(0);
1003  SDValue SX = SOp.getOperand(1);
1004  SDValue SY = SOp.getOperand(2);
1005  SDLoc DLS = SOp;
1006  if (IsZero(SY)) {
1007  SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
1008  SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
1009  DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1010  } else if (IsZero(SX)) {
1011  SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
1012  SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
1013  DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1014  }
1015  }
1016  }
1017 }
1018 
1019 // Transform: (store ch val (add x (add (shl y c) e)))
1020 // to: (store ch val (add x (shl (add y d) c))),
1021 // where e = (shl d c) for some integer d.
1022 // The purpose of this is to enable generation of loads/stores with
1023 // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1024 // value c must be 0, 1 or 2.
1025 void HexagonDAGToDAGISel::ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes) {
1026  SelectionDAG &DAG = *CurDAG;
1027 
1028  for (auto I : Nodes) {
1029  if (I->getOpcode() != ISD::STORE)
1030  continue;
1031 
1032  // I matched: (store ch val Off)
1033  SDValue Off = I->getOperand(2);
1034  // Off needs to match: (add x (add (shl y c) (shl d c))))
1035  if (Off.getOpcode() != ISD::ADD)
1036  continue;
1037  // Off matched: (add x T0)
1038  SDValue T0 = Off.getOperand(1);
1039  // T0 needs to match: (add T1 T2):
1040  if (T0.getOpcode() != ISD::ADD)
1041  continue;
1042  // T0 matched: (add T1 T2)
1043  SDValue T1 = T0.getOperand(0);
1044  SDValue T2 = T0.getOperand(1);
1045  // T1 needs to match: (shl y c)
1046  if (T1.getOpcode() != ISD::SHL)
1047  continue;
1048  SDValue C = T1.getOperand(1);
1050  if (CN == nullptr)
1051  continue;
1052  unsigned CV = CN->getZExtValue();
1053  if (CV > 2)
1054  continue;
1055  // T2 needs to match e, where e = (shl d c) for some d.
1057  if (EN == nullptr)
1058  continue;
1059  unsigned EV = EN->getZExtValue();
1060  if (EV % (1 << CV) != 0)
1061  continue;
1062  unsigned DV = EV / (1 << CV);
1063 
1064  // Replace T0 with: (shl (add y d) c)
1065  SDLoc DL = SDLoc(I);
1066  EVT VT = T0.getValueType();
1067  SDValue D = DAG.getConstant(DV, DL, VT);
1068  // NewAdd = (add y d)
1069  SDValue NewAdd = DAG.getNode(ISD::ADD, DL, VT, T1.getOperand(0), D);
1070  // NewShl = (shl NewAdd c)
1071  SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
1072  ReplaceNode(T0.getNode(), NewShl.getNode());
1073  }
1074 }
1075 
1076 // Transform: (load ch (add x (and (srl y c) Mask)))
1077 // to: (load ch (add x (shl (srl y d) d-c)))
1078 // where
1079 // Mask = 00..0 111..1 0.0
1080 // | | +-- d-c 0s, and d-c is 0, 1 or 2.
1081 // | +-------- 1s
1082 // +-------------- at most c 0s
1083 // Motivating example:
1084 // DAG combiner optimizes (add x (shl (srl y 5) 2))
1085 // to (add x (and (srl y 3) 1FFFFFFC))
1086 // which results in a constant-extended and(##...,lsr). This transformation
1087 // undoes this simplification for cases where the shl can be folded into
1088 // an addressing mode.
1089 void HexagonDAGToDAGISel::ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes) {
1090  SelectionDAG &DAG = *CurDAG;
1091 
1092  for (SDNode *N : Nodes) {
1093  unsigned Opc = N->getOpcode();
1094  if (Opc != ISD::LOAD && Opc != ISD::STORE)
1095  continue;
1096  SDValue Addr = Opc == ISD::LOAD ? N->getOperand(1) : N->getOperand(2);
1097  // Addr must match: (add x T0)
1098  if (Addr.getOpcode() != ISD::ADD)
1099  continue;
1100  SDValue T0 = Addr.getOperand(1);
1101  // T0 must match: (and T1 Mask)
1102  if (T0.getOpcode() != ISD::AND)
1103  continue;
1104 
1105  // We have an AND.
1106  //
1107  // Check the first operand. It must be: (srl y c).
1108  SDValue S = T0.getOperand(0);
1109  if (S.getOpcode() != ISD::SRL)
1110  continue;
1112  if (SN == nullptr)
1113  continue;
1114  if (SN->getAPIntValue().getBitWidth() != 32)
1115  continue;
1116  uint32_t CV = SN->getZExtValue();
1117 
1118  // Check the second operand: the supposed mask.
1120  if (MN == nullptr)
1121  continue;
1122  if (MN->getAPIntValue().getBitWidth() != 32)
1123  continue;
1124  uint32_t Mask = MN->getZExtValue();
1125  // Examine the mask.
1126  uint32_t TZ = countTrailingZeros(Mask);
1127  uint32_t M1 = countTrailingOnes(Mask >> TZ);
1128  uint32_t LZ = countLeadingZeros(Mask);
1129  // Trailing zeros + middle ones + leading zeros must equal the width.
1130  if (TZ + M1 + LZ != 32)
1131  continue;
1132  // The number of trailing zeros will be encoded in the addressing mode.
1133  if (TZ > 2)
1134  continue;
1135  // The number of leading zeros must be at most c.
1136  if (LZ > CV)
1137  continue;
1138 
1139  // All looks good.
1140  SDValue Y = S.getOperand(0);
1141  EVT VT = Addr.getValueType();
1142  SDLoc dl(S);
1143  // TZ = D-C, so D = TZ+C.
1144  SDValue D = DAG.getConstant(TZ+CV, dl, VT);
1145  SDValue DC = DAG.getConstant(TZ, dl, VT);
1146  SDValue NewSrl = DAG.getNode(ISD::SRL, dl, VT, Y, D);
1147  SDValue NewShl = DAG.getNode(ISD::SHL, dl, VT, NewSrl, DC);
1148  ReplaceNode(T0.getNode(), NewShl.getNode());
1149  }
1150 }
1151 
1152 // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1153 // (op ... 1 ...))
1154 void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) {
1155  SelectionDAG &DAG = *CurDAG;
1156 
1157  for (SDNode *N : Nodes) {
1158  unsigned Opc = N->getOpcode();
1159  if (Opc != ISD::ZERO_EXTEND)
1160  continue;
1161  SDValue OpI1 = N->getOperand(0);
1162  EVT OpVT = OpI1.getValueType();
1163  if (!OpVT.isSimple() || OpVT.getSimpleVT() != MVT::i1)
1164  continue;
1165  for (auto I = N->use_begin(), E = N->use_end(); I != E; ++I) {
1166  SDNode *U = *I;
1167  if (U->getNumValues() != 1)
1168  continue;
1169  EVT UVT = U->getValueType(0);
1170  if (!UVT.isSimple() || !UVT.isInteger() || UVT.getSimpleVT() == MVT::i1)
1171  continue;
1172  if (isMemOPCandidate(N, U))
1173  continue;
1174 
1175  // Potentially simplifiable operation.
1176  unsigned I1N = I.getOperandNo();
1178  for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i)
1179  Ops[i] = U->getOperand(i);
1180  EVT BVT = Ops[I1N].getValueType();
1181 
1182  SDLoc dl(U);
1183  SDValue C0 = DAG.getConstant(0, dl, BVT);
1184  SDValue C1 = DAG.getConstant(1, dl, BVT);
1185  SDValue If0, If1;
1186 
1187  if (isa<MachineSDNode>(U)) {
1188  unsigned UseOpc = U->getMachineOpcode();
1189  Ops[I1N] = C0;
1190  If0 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1191  Ops[I1N] = C1;
1192  If1 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1193  } else {
1194  unsigned UseOpc = U->getOpcode();
1195  Ops[I1N] = C0;
1196  If0 = DAG.getNode(UseOpc, dl, UVT, Ops);
1197  Ops[I1N] = C1;
1198  If1 = DAG.getNode(UseOpc, dl, UVT, Ops);
1199  }
1200  SDValue Sel = DAG.getNode(ISD::SELECT, dl, UVT, OpI1, If1, If0);
1201  DAG.ReplaceAllUsesWith(U, Sel.getNode());
1202  }
1203  }
1204 }
1205 
1207  // Repack all nodes before calling each preprocessing function,
1208  // because each of them can modify the set of nodes.
1209  auto getNodes = [this] () -> std::vector<SDNode*> {
1210  std::vector<SDNode*> T;
1211  T.reserve(CurDAG->allnodes_size());
1212  for (SDNode &N : CurDAG->allnodes())
1213  T.push_back(&N);
1214  return T;
1215  };
1216 
1217  // Transform: (or (select c x 0) z) -> (select c (or x z) z)
1218  // (or (select c 0 y) z) -> (select c z (or y z))
1219  ppSimplifyOrSelect0(getNodes());
1220 
1221  // Transform: (store ch val (add x (add (shl y c) e)))
1222  // to: (store ch val (add x (shl (add y d) c))),
1223  // where e = (shl d c) for some integer d.
1224  // The purpose of this is to enable generation of loads/stores with
1225  // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1226  // value c must be 0, 1 or 2.
1227  ppAddrReorderAddShl(getNodes());
1228 
1229  // Transform: (load ch (add x (and (srl y c) Mask)))
1230  // to: (load ch (add x (shl (srl y d) d-c)))
1231  // where
1232  // Mask = 00..0 111..1 0.0
1233  // | | +-- d-c 0s, and d-c is 0, 1 or 2.
1234  // | +-------- 1s
1235  // +-------------- at most c 0s
1236  // Motivating example:
1237  // DAG combiner optimizes (add x (shl (srl y 5) 2))
1238  // to (add x (and (srl y 3) 1FFFFFFC))
1239  // which results in a constant-extended and(##...,lsr). This transformation
1240  // undoes this simplification for cases where the shl can be folded into
1241  // an addressing mode.
1242  ppAddrRewriteAndSrl(getNodes());
1243 
1244  // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1245  // (op ... 1 ...))
1246  ppHoistZextI1(getNodes());
1247 
1248  DEBUG_WITH_TYPE("isel", {
1249  dbgs() << "Preprocessed (Hexagon) selection DAG:";
1250  CurDAG->dump();
1251  });
1252 
1254  rebalanceAddressTrees();
1255 
1256  DEBUG_WITH_TYPE("isel", {
1257  dbgs() << "Address tree balanced selection DAG:";
1258  CurDAG->dump();
1259  });
1260  }
1261 }
1262 
1264  auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget());
1265  auto &HFI = *HST.getFrameLowering();
1266  if (!HFI.needsAligna(*MF))
1267  return;
1268 
1269  MachineFrameInfo &MFI = MF->getFrameInfo();
1270  MachineBasicBlock *EntryBB = &MF->front();
1271  unsigned AR = FuncInfo->CreateReg(MVT::i32);
1272  unsigned MaxA = MFI.getMaxAlignment();
1273  BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AR)
1274  .addImm(MaxA);
1275  MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
1276 }
1277 
1278 // Match a frame index that can be used in an addressing mode.
1280  if (N.getOpcode() != ISD::FrameIndex)
1281  return false;
1282  auto &HFI = *HST->getFrameLowering();
1283  MachineFrameInfo &MFI = MF->getFrameInfo();
1284  int FX = cast<FrameIndexSDNode>(N)->getIndex();
1285  if (!MFI.isFixedObjectIndex(FX) && HFI.needsAligna(*MF))
1286  return false;
1288  return true;
1289 }
1290 
1292  return SelectGlobalAddress(N, R, false, 0);
1293 }
1294 
1296  return SelectGlobalAddress(N, R, true, 0);
1297 }
1298 
1300  return SelectAnyImmediate(N, R, 0);
1301 }
1302 
1304  return SelectAnyImmediate(N, R, 0);
1305 }
1307  return SelectAnyImmediate(N, R, 1);
1308 }
1310  return SelectAnyImmediate(N, R, 2);
1311 }
1313  return SelectAnyImmediate(N, R, 3);
1314 }
1315 
1317  EVT T = N.getValueType();
1318  if (!T.isInteger() || T.getSizeInBits() != 32 || !isa<ConstantSDNode>(N))
1319  return false;
1320  R = N;
1321  return true;
1322 }
1323 
1325  uint32_t LogAlign) {
1326  auto IsAligned = [LogAlign] (uint64_t V) -> bool {
1327  return alignTo(V, (uint64_t)1 << LogAlign) == V;
1328  };
1329 
1330  switch (N.getOpcode()) {
1331  case ISD::Constant: {
1332  if (N.getValueType() != MVT::i32)
1333  return false;
1334  int32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
1335  if (!IsAligned(V))
1336  return false;
1337  R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
1338  return true;
1339  }
1340  case HexagonISD::JT:
1341  case HexagonISD::CP:
1342  // These are assumed to always be aligned at least 8-byte boundary.
1343  if (LogAlign > 3)
1344  return false;
1345  R = N.getOperand(0);
1346  return true;
1347  case ISD::ExternalSymbol:
1348  // Symbols may be aligned at any boundary.
1349  if (LogAlign > 0)
1350  return false;
1351  R = N;
1352  return true;
1353  case ISD::BlockAddress:
1354  // Block address is always aligned at least 4-byte boundary.
1355  if (LogAlign > 2 || !IsAligned(cast<BlockAddressSDNode>(N)->getOffset()))
1356  return false;
1357  R = N;
1358  return true;
1359  }
1360 
1361  if (SelectGlobalAddress(N, R, false, LogAlign) ||
1362  SelectGlobalAddress(N, R, true, LogAlign))
1363  return true;
1364 
1365  return false;
1366 }
1367 
1369  bool UseGP, uint32_t LogAlign) {
1370  auto IsAligned = [LogAlign] (uint64_t V) -> bool {
1371  return alignTo(V, (uint64_t)1 << LogAlign) == V;
1372  };
1373 
1374  switch (N.getOpcode()) {
1375  case ISD::ADD: {
1376  SDValue N0 = N.getOperand(0);
1377  SDValue N1 = N.getOperand(1);
1378  unsigned GAOpc = N0.getOpcode();
1379  if (UseGP && GAOpc != HexagonISD::CONST32_GP)
1380  return false;
1381  if (!UseGP && GAOpc != HexagonISD::CONST32)
1382  return false;
1383  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) {
1384  SDValue Addr = N0.getOperand(0);
1385  // For the purpose of alignment, sextvalue and zextvalue are the same.
1386  if (!IsAligned(Const->getZExtValue()))
1387  return false;
1388  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) {
1389  if (GA->getOpcode() == ISD::TargetGlobalAddress) {
1390  uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
1391  R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const),
1392  N.getValueType(), NewOff);
1393  return true;
1394  }
1395  }
1396  }
1397  break;
1398  }
1399  case HexagonISD::CP:
1400  case HexagonISD::JT:
1401  case HexagonISD::CONST32:
1402  // The operand(0) of CONST32 is TargetGlobalAddress, which is what we
1403  // want in the instruction.
1404  if (!UseGP)
1405  R = N.getOperand(0);
1406  return !UseGP;
1408  if (UseGP)
1409  R = N.getOperand(0);
1410  return UseGP;
1411  default:
1412  return false;
1413  }
1414 
1415  return false;
1416 }
1417 
1419  // This (complex pattern) function is meant to detect a sign-extension
1420  // i32->i64 on a per-operand basis. This would allow writing single
1421  // patterns that would cover a number of combinations of different ways
1422  // a sign-extensions could be written. For example:
1423  // (mul (DetectUseSxtw x) (DetectUseSxtw y)) -> (M2_dpmpyss_s0 x y)
1424  // could match either one of these:
1425  // (mul (sext x) (sext_inreg y))
1426  // (mul (sext-load *p) (sext_inreg y))
1427  // (mul (sext_inreg x) (sext y))
1428  // etc.
1429  //
1430  // The returned value will have type i64 and its low word will
1431  // contain the value being extended. The high bits are not specified.
1432  // The returned type is i64 because the original type of N was i64,
1433  // but the users of this function should only use the low-word of the
1434  // result, e.g.
1435  // (mul sxtw:x, sxtw:y) -> (M2_dpmpyss_s0 (LoReg sxtw:x), (LoReg sxtw:y))
1436 
1437  if (N.getValueType() != MVT::i64)
1438  return false;
1439  unsigned Opc = N.getOpcode();
1440  switch (Opc) {
1441  case ISD::SIGN_EXTEND:
1442  case ISD::SIGN_EXTEND_INREG: {
1443  // sext_inreg has the source type as a separate operand.
1444  EVT T = Opc == ISD::SIGN_EXTEND
1445  ? N.getOperand(0).getValueType()
1446  : cast<VTSDNode>(N.getOperand(1))->getVT();
1447  unsigned SW = T.getSizeInBits();
1448  if (SW == 32)
1449  R = N.getOperand(0);
1450  else if (SW < 32)
1451  R = N;
1452  else
1453  return false;
1454  break;
1455  }
1456  case ISD::LOAD: {
1457  LoadSDNode *L = cast<LoadSDNode>(N);
1458  if (L->getExtensionType() != ISD::SEXTLOAD)
1459  return false;
1460  // All extending loads extend to i32, so even if the value in
1461  // memory is shorter than 32 bits, it will be i32 after the load.
1462  if (L->getMemoryVT().getSizeInBits() > 32)
1463  return false;
1464  R = N;
1465  break;
1466  }
1467  case ISD::SRA: {
1468  auto *S = dyn_cast<ConstantSDNode>(N.getOperand(1));
1469  if (!S || S->getZExtValue() != 32)
1470  return false;
1471  R = N;
1472  break;
1473  }
1474  default:
1475  return false;
1476  }
1477  EVT RT = R.getValueType();
1478  if (RT == MVT::i64)
1479  return true;
1480  assert(RT == MVT::i32);
1481  // This is only to produce a value of type i64. Do not rely on the
1482  // high bits produced by this.
1483  const SDLoc &dl(N);
1484  SDValue Ops[] = {
1485  CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
1486  R, CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
1487  R, CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
1488  };
1489  SDNode *T = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
1490  MVT::i64, Ops);
1491  R = SDValue(T, 0);
1492  return true;
1493 }
1494 
1495 bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
1496  SDValue &Src) {
1497  unsigned Opc = Val.getOpcode();
1498  switch (Opc) {
1499  case ISD::SIGN_EXTEND:
1500  case ISD::ZERO_EXTEND:
1501  case ISD::ANY_EXTEND: {
1502  const SDValue &Op0 = Val.getOperand(0);
1503  EVT T = Op0.getValueType();
1504  if (T.isInteger() && T.getSizeInBits() == NumBits) {
1505  Src = Op0;
1506  return true;
1507  }
1508  break;
1509  }
1511  case ISD::AssertSext:
1512  case ISD::AssertZext:
1513  if (Val.getOperand(0).getValueType().isInteger()) {
1514  VTSDNode *T = cast<VTSDNode>(Val.getOperand(1));
1515  if (T->getVT().getSizeInBits() == NumBits) {
1516  Src = Val.getOperand(0);
1517  return true;
1518  }
1519  }
1520  break;
1521  case ISD::AND: {
1522  // Check if this is an AND with NumBits of lower bits set to 1.
1523  uint64_t Mask = (1 << NumBits) - 1;
1524  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1525  if (C->getZExtValue() == Mask) {
1526  Src = Val.getOperand(1);
1527  return true;
1528  }
1529  }
1530  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1531  if (C->getZExtValue() == Mask) {
1532  Src = Val.getOperand(0);
1533  return true;
1534  }
1535  }
1536  break;
1537  }
1538  case ISD::OR:
1539  case ISD::XOR: {
1540  // OR/XOR with the lower NumBits bits set to 0.
1541  uint64_t Mask = (1 << NumBits) - 1;
1542  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1543  if ((C->getZExtValue() & Mask) == 0) {
1544  Src = Val.getOperand(1);
1545  return true;
1546  }
1547  }
1548  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1549  if ((C->getZExtValue() & Mask) == 0) {
1550  Src = Val.getOperand(0);
1551  return true;
1552  }
1553  }
1554  }
1555  default:
1556  break;
1557  }
1558  return false;
1559 }
1560 
1561 bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
1562  return N->getAlignment() >= N->getMemoryVT().getStoreSize();
1563 }
1564 
1565 bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const {
1566  unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF);
1567  switch (N->getMemoryVT().getStoreSize()) {
1568  case 1:
1569  return StackSize <= 56; // 1*2^6 - 8
1570  case 2:
1571  return StackSize <= 120; // 2*2^6 - 8
1572  case 4:
1573  return StackSize <= 248; // 4*2^6 - 8
1574  default:
1575  return false;
1576  }
1577 }
1578 
1579 // Return true when the given node fits in a positive half word.
1580 bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const {
1581  if (const ConstantSDNode *CN = dyn_cast<const ConstantSDNode>(N)) {
1582  int64_t V = CN->getSExtValue();
1583  return V > 0 && isInt<16>(V);
1584  }
1585  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
1586  const VTSDNode *VN = dyn_cast<const VTSDNode>(N->getOperand(1));
1587  return VN->getVT().getSizeInBits() <= 16;
1588  }
1589  return false;
1590 }
1591 
1592 bool HexagonDAGToDAGISel::hasOneUse(const SDNode *N) const {
1593  return !CheckSingleUse || N->hasOneUse();
1594 }
1595 
1596 ////////////////////////////////////////////////////////////////////////////////
1597 // Rebalancing of address calculation trees
1598 
1599 static bool isOpcodeHandled(const SDNode *N) {
1600  switch (N->getOpcode()) {
1601  case ISD::ADD:
1602  case ISD::MUL:
1603  return true;
1604  case ISD::SHL:
1605  // We only handle constant shifts because these can be easily flattened
1606  // into multiplications by 2^Op1.
1607  return isa<ConstantSDNode>(N->getOperand(1).getNode());
1608  default:
1609  return false;
1610  }
1611 }
1612 
1613 /// \brief Return the weight of an SDNode
1614 int HexagonDAGToDAGISel::getWeight(SDNode *N) {
1615  if (!isOpcodeHandled(N))
1616  return 1;
1617  assert(RootWeights.count(N) && "Cannot get weight of unseen root!");
1618  assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!");
1619  assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!");
1620  return RootWeights[N];
1621 }
1622 
1623 int HexagonDAGToDAGISel::getHeight(SDNode *N) {
1624  if (!isOpcodeHandled(N))
1625  return 0;
1626  assert(RootWeights.count(N) && RootWeights[N] >= 0 &&
1627  "Cannot query height of unvisited/RAUW'd node!");
1628  return RootHeights[N];
1629 }
1630 
1631 namespace {
1632 struct WeightedLeaf {
1633  SDValue Value;
1634  int Weight;
1635  int InsertionOrder;
1636 
1637  WeightedLeaf() : Value(SDValue()) { }
1638 
1639  WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
1640  Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
1641  assert(Weight >= 0 && "Weight must be >= 0");
1642  }
1643 
1644  static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) {
1645  assert(A.Value.getNode() && B.Value.getNode());
1646  return A.Weight == B.Weight ?
1647  (A.InsertionOrder > B.InsertionOrder) :
1648  (A.Weight > B.Weight);
1649  }
1650 };
1651 
1652 /// A specialized priority queue for WeigthedLeaves. It automatically folds
1653 /// constants and allows removal of non-top elements while maintaining the
1654 /// priority order.
1655 class LeafPrioQueue {
1657  bool HaveConst;
1658  WeightedLeaf ConstElt;
1659  unsigned Opcode;
1660 
1661 public:
1662  bool empty() {
1663  return (!HaveConst && Q.empty());
1664  }
1665 
1666  size_t size() {
1667  return Q.size() + HaveConst;
1668  }
1669 
1670  bool hasConst() {
1671  return HaveConst;
1672  }
1673 
1674  const WeightedLeaf &top() {
1675  if (HaveConst)
1676  return ConstElt;
1677  return Q.front();
1678  }
1679 
1680  WeightedLeaf pop() {
1681  if (HaveConst) {
1682  HaveConst = false;
1683  return ConstElt;
1684  }
1685  std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1686  return Q.pop_back_val();
1687  }
1688 
1689  void push(WeightedLeaf L, bool SeparateConst=true) {
1690  if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) {
1691  if (Opcode == ISD::MUL &&
1692  cast<ConstantSDNode>(L.Value)->getSExtValue() == 1)
1693  return;
1694  if (Opcode == ISD::ADD &&
1695  cast<ConstantSDNode>(L.Value)->getSExtValue() == 0)
1696  return;
1697 
1698  HaveConst = true;
1699  ConstElt = L;
1700  } else {
1701  Q.push_back(L);
1702  std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1703  }
1704  }
1705 
1706  /// Push L to the bottom of the queue regardless of its weight. If L is
1707  /// constant, it will not be folded with other constants in the queue.
1708  void pushToBottom(WeightedLeaf L) {
1709  L.Weight = 1000;
1710  push(L, false);
1711  }
1712 
1713  /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of
1714  /// lowest weight and remove it from the queue.
1715  WeightedLeaf findSHL(uint64_t MaxAmount);
1716 
1717  WeightedLeaf findMULbyConst();
1718 
1719  LeafPrioQueue(unsigned Opcode) :
1720  HaveConst(false), Opcode(Opcode) { }
1721 };
1722 } // end anonymous namespace
1723 
1724 WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) {
1725  int ResultPos;
1726  WeightedLeaf Result;
1727 
1728  for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1729  const WeightedLeaf &L = Q[Pos];
1730  const SDValue &Val = L.Value;
1731  if (Val.getOpcode() != ISD::SHL ||
1732  !isa<ConstantSDNode>(Val.getOperand(1)) ||
1733  Val.getConstantOperandVal(1) > MaxAmount)
1734  continue;
1735  if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1736  (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1737  {
1738  Result = L;
1739  ResultPos = Pos;
1740  }
1741  }
1742 
1743  if (Result.Value.getNode()) {
1744  Q.erase(&Q[ResultPos]);
1745  std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1746  }
1747 
1748  return Result;
1749 }
1750 
1751 WeightedLeaf LeafPrioQueue::findMULbyConst() {
1752  int ResultPos;
1753  WeightedLeaf Result;
1754 
1755  for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1756  const WeightedLeaf &L = Q[Pos];
1757  const SDValue &Val = L.Value;
1758  if (Val.getOpcode() != ISD::MUL ||
1759  !isa<ConstantSDNode>(Val.getOperand(1)) ||
1760  Val.getConstantOperandVal(1) > 127)
1761  continue;
1762  if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1763  (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1764  {
1765  Result = L;
1766  ResultPos = Pos;
1767  }
1768  }
1769 
1770  if (Result.Value.getNode()) {
1771  Q.erase(&Q[ResultPos]);
1772  std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1773  }
1774 
1775  return Result;
1776 }
1777 
1778 SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) {
1779  uint64_t MulFactor = 1ull << N->getConstantOperandVal(1);
1780  return CurDAG->getConstant(MulFactor, SDLoc(N),
1781  N->getOperand(1).getValueType());
1782 }
1783 
1784 /// @returns the value x for which 2^x is a factor of Val
1785 static unsigned getPowerOf2Factor(SDValue Val) {
1786  if (Val.getOpcode() == ISD::MUL) {
1787  unsigned MaxFactor = 0;
1788  for (int i = 0; i < 2; ++i) {
1790  if (!C)
1791  continue;
1792  const APInt &CInt = C->getAPIntValue();
1793  if (CInt.getBoolValue())
1794  MaxFactor = CInt.countTrailingZeros();
1795  }
1796  return MaxFactor;
1797  }
1798  if (Val.getOpcode() == ISD::SHL) {
1799  if (!isa<ConstantSDNode>(Val.getOperand(1).getNode()))
1800  return 0;
1801  return (unsigned) Val.getConstantOperandVal(1);
1802  }
1803 
1804  return 0;
1805 }
1806 
1807 /// @returns true if V>>Amount will eliminate V's operation on its child
1808 static bool willShiftRightEliminate(SDValue V, unsigned Amount) {
1809  if (V.getOpcode() == ISD::MUL) {
1810  SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1811  for (int i = 0; i < 2; ++i)
1812  if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1813  V.getConstantOperandVal(i) % (1ULL << Amount) == 0) {
1814  uint64_t NewConst = V.getConstantOperandVal(i) >> Amount;
1815  return (NewConst == 1);
1816  }
1817  } else if (V.getOpcode() == ISD::SHL) {
1818  return (Amount == V.getConstantOperandVal(1));
1819  }
1820 
1821  return false;
1822 }
1823 
1824 SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) {
1825  SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1826  if (V.getOpcode() == ISD::MUL) {
1827  for (int i=0; i < 2; ++i) {
1828  if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1829  V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) {
1830  uint64_t NewConst = V.getConstantOperandVal(i) >> Power;
1831  if (NewConst == 1)
1832  return Ops[!i];
1833  Ops[i] = CurDAG->getConstant(NewConst,
1834  SDLoc(V), V.getValueType());
1835  break;
1836  }
1837  }
1838  } else if (V.getOpcode() == ISD::SHL) {
1839  uint64_t ShiftAmount = V.getConstantOperandVal(1);
1840  if (ShiftAmount == Power)
1841  return Ops[0];
1842  Ops[1] = CurDAG->getConstant(ShiftAmount - Power,
1843  SDLoc(V), V.getValueType());
1844  }
1845 
1846  return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops);
1847 }
1848 
1849 static bool isTargetConstant(const SDValue &V) {
1850  return V.getOpcode() == HexagonISD::CONST32 ||
1852 }
1853 
1854 unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) {
1855  if (GAUsesInFunction.count(V))
1856  return GAUsesInFunction[V];
1857 
1858  unsigned Result = 0;
1859  const Function &CurF = CurDAG->getMachineFunction().getFunction();
1860  for (const User *U : V->users()) {
1861  if (isa<Instruction>(U) &&
1862  cast<Instruction>(U)->getParent()->getParent() == &CurF)
1863  ++Result;
1864  }
1865 
1866  GAUsesInFunction[V] = Result;
1867 
1868  return Result;
1869 }
1870 
1871 /// Note - After calling this, N may be dead. It may have been replaced by a
1872 /// new node, so always use the returned value in place of N.
1873 ///
1874 /// @returns The SDValue taking the place of N (which could be N if it is
1875 /// unchanged)
1876 SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
1877  assert(RootWeights.count(N) && "Cannot balance non-root node.");
1878  assert(RootWeights[N] != -2 && "This node was RAUW'd!");
1879  assert(!TopLevel || N->getOpcode() == ISD::ADD);
1880 
1881  // Return early if this node was already visited
1882  if (RootWeights[N] != -1)
1883  return SDValue(N, 0);
1884 
1885  assert(isOpcodeHandled(N));
1886 
1887  SDValue Op0 = N->getOperand(0);
1888  SDValue Op1 = N->getOperand(1);
1889 
1890  // Return early if the operands will remain unchanged or are all roots
1891  if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) &&
1892  (!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) {
1893  SDNode *Op0N = Op0.getNode();
1894  int Weight;
1895  if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) {
1896  Weight = getWeight(balanceSubTree(Op0N).getNode());
1897  // Weight = calculateWeight(Op0N);
1898  } else
1899  Weight = getWeight(Op0N);
1900 
1901  SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd
1902  if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) {
1903  Weight += getWeight(balanceSubTree(Op1N).getNode());
1904  // Weight += calculateWeight(Op1N);
1905  } else
1906  Weight += getWeight(Op1N);
1907 
1908  RootWeights[N] = Weight;
1909  RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
1910  getHeight(N->getOperand(1).getNode())) + 1;
1911 
1912  DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
1913  << " Height=" << RootHeights[N] << "): ");
1914  DEBUG(N->dump(CurDAG));
1915 
1916  return SDValue(N, 0);
1917  }
1918 
1919  DEBUG(dbgs() << "** Balancing root node: ");
1920  DEBUG(N->dump(CurDAG));
1921 
1922  unsigned NOpcode = N->getOpcode();
1923 
1924  LeafPrioQueue Leaves(NOpcode);
1925  SmallVector<SDValue, 4> Worklist;
1926  Worklist.push_back(SDValue(N, 0));
1927 
1928  // SHL nodes will be converted to MUL nodes
1929  if (NOpcode == ISD::SHL)
1930  NOpcode = ISD::MUL;
1931 
1932  bool CanFactorize = false;
1933  WeightedLeaf Mul1, Mul2;
1934  unsigned MaxPowerOf2 = 0;
1935  WeightedLeaf GA;
1936 
1937  // Do not try to factor out a shift if there is already a shift at the tip of
1938  // the tree.
1939  bool HaveTopLevelShift = false;
1940  if (TopLevel &&
1941  ((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL &&
1942  Op0.getConstantOperandVal(1) < 4) ||
1943  (isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL &&
1944  Op1.getConstantOperandVal(1) < 4)))
1945  HaveTopLevelShift = true;
1946 
1947  // Flatten the subtree into an ordered list of leaves; at the same time
1948  // determine whether the tree is already balanced.
1949  int InsertionOrder = 0;
1950  SmallDenseMap<SDValue, int> NodeHeights;
1951  bool Imbalanced = false;
1952  int CurrentWeight = 0;
1953  while (!Worklist.empty()) {
1954  SDValue Child = Worklist.pop_back_val();
1955 
1956  if (Child.getNode() != N && RootWeights.count(Child.getNode())) {
1957  // CASE 1: Child is a root note
1958 
1959  int Weight = RootWeights[Child.getNode()];
1960  if (Weight == -1) {
1961  Child = balanceSubTree(Child.getNode());
1962  // calculateWeight(Child.getNode());
1963  Weight = getWeight(Child.getNode());
1964  } else if (Weight == -2) {
1965  // Whoops, this node was RAUWd by one of the balanceSubTree calls we
1966  // made. Our worklist isn't up to date anymore.
1967  // Restart the whole process.
1968  DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
1969  return balanceSubTree(N, TopLevel);
1970  }
1971 
1972  NodeHeights[Child] = 1;
1973  CurrentWeight += Weight;
1974 
1975  unsigned PowerOf2;
1976  if (TopLevel && !CanFactorize && !HaveTopLevelShift &&
1977  (Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) &&
1978  Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) {
1979  // Try to identify two factorizable MUL/SHL children greedily. Leave
1980  // them out of the priority queue for now so we can deal with them
1981  // after.
1982  if (!Mul1.Value.getNode()) {
1983  Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++);
1984  MaxPowerOf2 = PowerOf2;
1985  } else {
1986  Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++);
1987  MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2);
1988 
1989  // Our addressing modes can only shift by a maximum of 3
1990  if (MaxPowerOf2 > 3)
1991  MaxPowerOf2 = 3;
1992 
1993  CanFactorize = true;
1994  }
1995  } else
1996  Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
1997  } else if (!isOpcodeHandled(Child.getNode())) {
1998  // CASE 2: Child is an unhandled kind of node (e.g. constant)
1999  int Weight = getWeight(Child.getNode());
2000 
2001  NodeHeights[Child] = getHeight(Child.getNode());
2002  CurrentWeight += Weight;
2003 
2004  if (isTargetConstant(Child) && !GA.Value.getNode())
2005  GA = WeightedLeaf(Child, Weight, InsertionOrder++);
2006  else
2007  Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
2008  } else {
2009  // CASE 3: Child is a subtree of same opcode
2010  // Visit children first, then flatten.
2011  unsigned ChildOpcode = Child.getOpcode();
2012  assert(ChildOpcode == NOpcode ||
2013  (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL));
2014 
2015  // Convert SHL to MUL
2016  SDValue Op1;
2017  if (ChildOpcode == ISD::SHL)
2018  Op1 = getMultiplierForSHL(Child.getNode());
2019  else
2020  Op1 = Child->getOperand(1);
2021 
2022  if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) {
2023  assert(!NodeHeights.count(Child) && "Parent visited before children?");
2024  // Visit children first, then re-visit this node
2025  Worklist.push_back(Child);
2026  Worklist.push_back(Op1);
2027  Worklist.push_back(Child->getOperand(0));
2028  } else {
2029  // Back at this node after visiting the children
2030  if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1)
2031  Imbalanced = true;
2032 
2033  NodeHeights[Child] = std::max(NodeHeights[Op1],
2034  NodeHeights[Child->getOperand(0)]) + 1;
2035  }
2036  }
2037  }
2038 
2039  DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
2040  << " weight=" << CurrentWeight << " imbalanced="
2041  << Imbalanced << "\n");
2042 
2043  // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
2044  // This factors out a shift in order to match memw(a<<Y+b).
2045  if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
2046  willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) {
2047  DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
2048  int Weight = Mul1.Weight + Mul2.Weight;
2049  int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
2050  SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
2051  SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2);
2052  SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(),
2053  Mul1Factored, Mul2Factored);
2054  SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N),
2055  Mul1.Value.getValueType());
2056  SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(),
2057  Sum, Const);
2058  NodeHeights[New] = Height;
2059  Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder));
2060  } else if (Mul1.Value.getNode()) {
2061  // We failed to factorize two MULs, so now the Muls are left outside the
2062  // queue... add them back.
2063  Leaves.push(Mul1);
2064  if (Mul2.Value.getNode())
2065  Leaves.push(Mul2);
2066  CanFactorize = false;
2067  }
2068 
2069  // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere
2070  // and the root node itself is not used more than twice. This reduces the
2071  // amount of additional constant extenders introduced by this optimization.
2072  bool CombinedGA = false;
2073  if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() &&
2074  GA.Value.hasOneUse() && N->use_size() < 3) {
2075  GlobalAddressSDNode *GANode =
2076  cast<GlobalAddressSDNode>(GA.Value.getOperand(0));
2077  ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value);
2078 
2079  if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() &&
2081  DEBUG(dbgs() << "--> Combining GA and offset (" << Offset->getSExtValue()
2082  << "): ");
2083  DEBUG(GANode->dump(CurDAG));
2084 
2085  SDValue NewTGA =
2086  CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
2087  GANode->getValueType(0),
2088  GANode->getOffset() + (uint64_t)Offset->getSExtValue());
2089  GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value),
2090  GA.Value.getValueType(), NewTGA);
2091  GA.Weight += Leaves.top().Weight;
2092 
2093  NodeHeights[GA.Value] = getHeight(GA.Value.getNode());
2094  CombinedGA = true;
2095 
2096  Leaves.pop(); // Remove the offset constant from the queue
2097  }
2098  }
2099 
2100  if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) ||
2101  (RebalanceOnlyImbalancedTrees && !Imbalanced)) {
2102  RootWeights[N] = CurrentWeight;
2103  RootHeights[N] = NodeHeights[SDValue(N, 0)];
2104 
2105  return SDValue(N, 0);
2106  }
2107 
2108  // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5))
2109  if (NOpcode == ISD::ADD && GA.Value.getNode()) {
2110  WeightedLeaf SHL = Leaves.findSHL(31);
2111  if (SHL.Value.getNode()) {
2112  int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1;
2113  GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value),
2114  GA.Value.getValueType(),
2115  GA.Value, SHL.Value);
2116  GA.Weight = SHL.Weight; // Specifically ignore the GA weight here
2117  NodeHeights[GA.Value] = Height;
2118  }
2119  }
2120 
2121  if (GA.Value.getNode())
2122  Leaves.push(GA);
2123 
2124  // If this is the top level and we haven't factored out a shift, we should try
2125  // to move a constant to the bottom to match addressing modes like memw(rX+C)
2126  if (TopLevel && !CanFactorize && Leaves.hasConst()) {
2127  DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
2128  Leaves.pushToBottom(Leaves.pop());
2129  }
2130 
2131  const DataLayout &DL = CurDAG->getDataLayout();
2132  const TargetLowering &TLI = *getTargetLowering();
2133 
2134  // Rebuild the tree using Huffman's algorithm
2135  while (Leaves.size() > 1) {
2136  WeightedLeaf L0 = Leaves.pop();
2137 
2138  // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)),
2139  // otherwise just get the next leaf
2140  WeightedLeaf L1 = Leaves.findMULbyConst();
2141  if (!L1.Value.getNode())
2142  L1 = Leaves.pop();
2143 
2144  assert(L0.Weight <= L1.Weight && "Priority queue is broken!");
2145 
2146  SDValue V0 = L0.Value;
2147  int V0Weight = L0.Weight;
2148  SDValue V1 = L1.Value;
2149  int V1Weight = L1.Weight;
2150 
2151  // Make sure that none of these nodes have been RAUW'd
2152  if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) ||
2153  (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) {
2154  DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
2155  return balanceSubTree(N, TopLevel);
2156  }
2157 
2160  EVT VT = N->getValueType(0);
2161  SDValue NewNode;
2162 
2163  if (V0C && !V1C) {
2164  std::swap(V0, V1);
2165  std::swap(V0C, V1C);
2166  }
2167 
2168  // Calculate height of this node
2169  assert(NodeHeights.count(V0) && NodeHeights.count(V1) &&
2170  "Children must have been visited before re-combining them!");
2171  int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1;
2172 
2173  // Rebuild this node (and restore SHL from MUL if needed)
2174  if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2())
2175  NewNode = CurDAG->getNode(
2176  ISD::SHL, SDLoc(V0), VT, V0,
2178  V1C->getAPIntValue().logBase2(), SDLoc(N),
2179  TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2180  else
2181  NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1);
2182 
2183  NodeHeights[NewNode] = Height;
2184 
2185  int Weight = V0Weight + V1Weight;
2186  Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
2187 
2188  DEBUG(dbgs() << "--> Built new node (Weight=" << Weight << ",Height="
2189  << Height << "):\n");
2190  DEBUG(NewNode.dump());
2191  }
2192 
2193  assert(Leaves.size() == 1);
2194  SDValue NewRoot = Leaves.top().Value;
2195 
2196  assert(NodeHeights.count(NewRoot));
2197  int Height = NodeHeights[NewRoot];
2198 
2199  // Restore SHL if we earlier converted it to a MUL
2200  if (NewRoot.getOpcode() == ISD::MUL) {
2201  ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1));
2202  if (V1C && V1C->getAPIntValue().isPowerOf2()) {
2203  EVT VT = NewRoot.getValueType();
2204  SDValue V0 = NewRoot.getOperand(0);
2205  NewRoot = CurDAG->getNode(
2206  ISD::SHL, SDLoc(NewRoot), VT, V0,
2208  V1C->getAPIntValue().logBase2(), SDLoc(NewRoot),
2209  TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2210  }
2211  }
2212 
2213  if (N != NewRoot.getNode()) {
2214  DEBUG(dbgs() << "--> Root is now: ");
2215  DEBUG(NewRoot.dump());
2216 
2217  // Replace all uses of old root by new root
2218  CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
2219  // Mark that we have RAUW'd N
2220  RootWeights[N] = -2;
2221  } else {
2222  DEBUG(dbgs() << "--> Root unchanged.\n");
2223  }
2224 
2225  RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
2226  RootHeights[NewRoot.getNode()] = Height;
2227 
2228  return NewRoot;
2229 }
2230 
2231 void HexagonDAGToDAGISel::rebalanceAddressTrees() {
2232  for (auto I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E;) {
2233  SDNode *N = &*I++;
2234  if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
2235  continue;
2236 
2237  SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr();
2238  if (BasePtr.getOpcode() != ISD::ADD)
2239  continue;
2240 
2241  // We've already processed this node
2242  if (RootWeights.count(BasePtr.getNode()))
2243  continue;
2244 
2245  DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
2246  DEBUG(N->dump(CurDAG));
2247 
2248  // FindRoots
2249  SmallVector<SDNode *, 4> Worklist;
2250 
2251  Worklist.push_back(BasePtr.getOperand(0).getNode());
2252  Worklist.push_back(BasePtr.getOperand(1).getNode());
2253 
2254  while (!Worklist.empty()) {
2255  SDNode *N = Worklist.pop_back_val();
2256  unsigned Opcode = N->getOpcode();
2257 
2258  if (!isOpcodeHandled(N))
2259  continue;
2260 
2261  Worklist.push_back(N->getOperand(0).getNode());
2262  Worklist.push_back(N->getOperand(1).getNode());
2263 
2264  // Not a root if it has only one use and same opcode as its parent
2265  if (N->hasOneUse() && Opcode == N->use_begin()->getOpcode())
2266  continue;
2267 
2268  // This root node has already been processed
2269  if (RootWeights.count(N))
2270  continue;
2271 
2272  RootWeights[N] = -1;
2273  }
2274 
2275  // Balance node itself
2276  RootWeights[BasePtr.getNode()] = -1;
2277  SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true);
2278 
2279  if (N->getOpcode() == ISD::LOAD)
2280  N = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
2281  NewBasePtr, N->getOperand(2));
2282  else
2283  N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
2284  NewBasePtr, N->getOperand(3));
2285 
2286  DEBUG(dbgs() << "--> Final node: ");
2287  DEBUG(N->dump(CurDAG));
2288  }
2289 
2291  GAUsesInFunction.clear();
2292  RootHeights.clear();
2293  RootWeights.clear();
2294 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:546
static MVT getIntegerVT(unsigned BitWidth)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOffset() const
const GlobalValue * getGlobal() const
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
static cl::opt< bool > CheckSingleUse("hexagon-isel-su", cl::Hidden, cl::init(true), cl::desc("Enable checking of SDNode's single-use status"))
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static bool isMemOPCandidate(SDNode *I, SDNode *U)
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineSDNodes&#39;s memory reference descriptor list.
static bool willShiftRightEliminate(SDValue V, unsigned Amount)
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:343
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool DetectUseSxtw(SDValue &N, SDValue &R)
const SDValue & getValue() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
bool SelectAddrFI(SDValue &N, SDValue &R)
const SDValue & getChain() const
static unsigned getPowerOf2Factor(SDValue Val)
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
unsigned getAlignment() const
Hexagon target-specific information for each MachineFunction.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:298
A debug info location.
Definition: DebugLoc.h:34
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:677
const HexagonFrameLowering * getFrameLowering() const override
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:181
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
MachineFunction * MF
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
bool isTruncatingStore() const
Return true if the op does a truncation before store.
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1611
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
static ManagedStatic< DebugCounter > DC
static cl::opt< bool > RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false), cl::desc("Rebalance address tree only if this allows optimizations"))
Shift and rotation operations.
Definition: ISDOpcodes.h:380
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:470
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
const TargetLowering * TLI
static bool isTargetConstant(const SDValue &V)
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl)
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:447
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:388
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This class is used to represent EVT&#39;s, which are used to parameterize some operations.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:436
unsigned getSizeInBits() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:471
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:385
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:613
#define T
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:561
bool SelectNewCircIntrinsic(SDNode *IntN)
Generate a machine instruction node for the new circlar buffer intrinsics.
static cl::opt< bool > EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true), cl::desc("Rebalance address calculation trees to improve " "instruction selection"))
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
bool SelectBrevLdIntrinsic(SDNode *IntN)
SDNode * StoreInstrForLoadIntrinsic(MachineSDNode *LoadN, SDNode *IntN)
This class is used to represent ISD::STORE nodes.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
const SDValue & getBasePtr() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:112
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:421
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool SelectAnyImm0(SDValue &N, SDValue &R)
bool SelectAnyInt(SDValue &N, SDValue &R)
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:893
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions...
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
const SDValue & getOffset() const
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool isValidAutoIncImm(const EVT VT, const int Offset) const
static const unsigned End
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const APInt & getAPIntValue() const
bool SelectAnyImm2(SDValue &N, SDValue &R)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
Extended Value Type.
Definition: ValueTypes.h:34
void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl)
const MachineBasicBlock & front() const
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isOpcodeHandled(const SDNode *N)
void dump() const
Dump this node, for debugging.
bool isHVXVectorType(MVT VecTy, bool IncludeBool=false) const
bool SelectAddrGA(SDValue &N, SDValue &R)
ilist< SDNode >::size_type allnodes_size() const
Definition: SelectionDAG.h:432
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:212
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP, uint32_t LogAlign)
unsigned estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
void dump() const
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:725
virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const
EVT is not used in-tree, but is used by out-of-tree target.
An SDNode that represents everything that will be needed to construct a MachineInstr.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:383
CHAIN = SC CHAIN, Imm128 - System call.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Represents one node in the SelectionDAG.
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:424
const Function & getFunction() const
Return the LLVM function that this machine code represents.
bool SelectAnyImmediate(SDValue &N, SDValue &R, uint32_t LogAlign)
unsigned logBase2() const
Definition: APInt.h:1727
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
size_t use_size() const
Return the number of uses of this node.
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:390
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:457
iterator_range< user_iterator > users()
Definition: Value.h:405
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:446
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:449
static cl::opt< bool > RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden, cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"))
bool SelectAnyImm3(SDValue &N, SDValue &R)
bool SelectAnyImm1(SDValue &N, SDValue &R)
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:425
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
const SDValue & getValue() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:690
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
bool SelectAddrGP(SDValue &N, SDValue &R)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:464
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:582
void ReplaceAllUsesWith(SDValue From, SDValue Op)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool tryLoadOfLoadIntrinsic(LoadSDNode *N)
FunctionPass * createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOpt::Level OptLevel)
unsigned getOpcode() const
unsigned CreateReg(MVT VT)
CreateReg - Allocate a single virtual register for the given type.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:141
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
const TargetLowering * getTargetLowering() const
bool SelectAnyImm(SDValue &N, SDValue &R)
This class is used to form a handle around another node that is persistent and is updated across invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:73
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
static const Function * getParent(const Value *V)
bool isNonTemporal() const
#define DEBUG(X)
Definition: Debug.h:118
const APFloat & getValueAPF() const
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
APInt bitcastToAPInt() const
Definition: APFloat.h:1094
Conversion operators.
Definition: ISDOpcodes.h:443
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
MachineSDNode * LoadInstrForLoadIntrinsic(SDNode *IntN)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
FunctionLoweringInfo * FuncInfo
#define T1
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:607
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:873
This class is used to represent ISD::LOAD nodes.