LLVM  8.0.0svn
HexagonISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the Hexagon target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "Hexagon.h"
15 #include "HexagonISelDAGToDAG.h"
16 #include "HexagonISelLowering.h"
18 #include "HexagonTargetMachine.h"
22 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/Support/Debug.h"
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "hexagon-isel"
28 
29 static
31 EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true),
32  cl::desc("Rebalance address calculation trees to improve "
33  "instruction selection"));
34 
35 // Rebalance only if this allows e.g. combining a GA with an offset or
36 // factoring out a shift.
37 static
39 RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false),
40  cl::desc("Rebalance address tree only if this allows optimizations"));
41 
42 static
44 RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden,
45  cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));
46 
47 static cl::opt<bool> CheckSingleUse("hexagon-isel-su", cl::Hidden,
48  cl::init(true), cl::desc("Enable checking of SDNode's single-use status"));
49 
50 //===----------------------------------------------------------------------===//
51 // Instruction Selector Implementation
52 //===----------------------------------------------------------------------===//
53 
54 #define GET_DAGISEL_BODY HexagonDAGToDAGISel
55 #include "HexagonGenDAGISel.inc"
56 
57 /// createHexagonISelDag - This pass converts a legalized DAG into a
58 /// Hexagon-specific DAG, ready for instruction scheduling.
59 ///
60 namespace llvm {
62  CodeGenOpt::Level OptLevel) {
63  return new HexagonDAGToDAGISel(TM, OptLevel);
64 }
65 }
66 
68  SDValue Chain = LD->getChain();
69  SDValue Base = LD->getBasePtr();
70  SDValue Offset = LD->getOffset();
71  int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
72  EVT LoadedVT = LD->getMemoryVT();
73  unsigned Opcode = 0;
74 
75  // Check for zero extended loads. Treat any-extend loads as zero extended
76  // loads.
78  bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
79  bool IsValidInc = HII->isValidAutoIncImm(LoadedVT, Inc);
80 
81  assert(LoadedVT.isSimple());
82  switch (LoadedVT.getSimpleVT().SimpleTy) {
83  case MVT::i8:
84  if (IsZeroExt)
85  Opcode = IsValidInc ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrub_io;
86  else
87  Opcode = IsValidInc ? Hexagon::L2_loadrb_pi : Hexagon::L2_loadrb_io;
88  break;
89  case MVT::i16:
90  if (IsZeroExt)
91  Opcode = IsValidInc ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadruh_io;
92  else
93  Opcode = IsValidInc ? Hexagon::L2_loadrh_pi : Hexagon::L2_loadrh_io;
94  break;
95  case MVT::i32:
96  case MVT::f32:
97  case MVT::v2i16:
98  case MVT::v4i8:
99  Opcode = IsValidInc ? Hexagon::L2_loadri_pi : Hexagon::L2_loadri_io;
100  break;
101  case MVT::i64:
102  case MVT::f64:
103  case MVT::v2i32:
104  case MVT::v4i16:
105  case MVT::v8i8:
106  Opcode = IsValidInc ? Hexagon::L2_loadrd_pi : Hexagon::L2_loadrd_io;
107  break;
108  case MVT::v64i8:
109  case MVT::v32i16:
110  case MVT::v16i32:
111  case MVT::v8i64:
112  case MVT::v128i8:
113  case MVT::v64i16:
114  case MVT::v32i32:
115  case MVT::v16i64:
116  if (isAlignedMemNode(LD)) {
117  if (LD->isNonTemporal())
118  Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai;
119  else
120  Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai;
121  } else {
122  Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai;
123  }
124  break;
125  default:
126  llvm_unreachable("Unexpected memory type in indexed load");
127  }
128 
129  SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
130  MachineMemOperand *MemOp = LD->getMemOperand();
131 
132  auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl)
133  -> MachineSDNode* {
134  if (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD) {
135  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
136  return CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64,
137  Zero, SDValue(N, 0));
138  }
139  if (ExtType == ISD::SEXTLOAD)
140  return CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
141  SDValue(N, 0));
142  return N;
143  };
144 
145  // Loaded value Next address Chain
146  SDValue From[3] = { SDValue(LD,0), SDValue(LD,1), SDValue(LD,2) };
147  SDValue To[3];
148 
149  EVT ValueVT = LD->getValueType(0);
150  if (ValueVT == MVT::i64 && ExtType != ISD::NON_EXTLOAD) {
151  // A load extending to i64 will actually produce i32, which will then
152  // need to be extended to i64.
153  assert(LoadedVT.getSizeInBits() <= 32);
154  ValueVT = MVT::i32;
155  }
156 
157  if (IsValidInc) {
158  MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT,
159  MVT::i32, MVT::Other, Base,
160  IncV, Chain);
161  CurDAG->setNodeMemRefs(L, {MemOp});
162  To[1] = SDValue(L, 1); // Next address.
163  To[2] = SDValue(L, 2); // Chain.
164  // Handle special case for extension to i64.
165  if (LD->getValueType(0) == MVT::i64)
166  L = getExt64(L, dl);
167  To[0] = SDValue(L, 0); // Loaded (extended) value.
168  } else {
169  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
170  MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other,
171  Base, Zero, Chain);
172  CurDAG->setNodeMemRefs(L, {MemOp});
173  To[2] = SDValue(L, 1); // Chain.
174  MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
175  Base, IncV);
176  To[1] = SDValue(A, 0); // Next address.
177  // Handle special case for extension to i64.
178  if (LD->getValueType(0) == MVT::i64)
179  L = getExt64(L, dl);
180  To[0] = SDValue(L, 0); // Loaded (extended) value.
181  }
182  ReplaceUses(From, To, 3);
183  CurDAG->RemoveDeadNode(LD);
184 }
185 
187  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
188  return nullptr;
189 
190  SDLoc dl(IntN);
191  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
192 
193  static std::map<unsigned,unsigned> LoadPciMap = {
194  { Intrinsic::hexagon_circ_ldb, Hexagon::L2_loadrb_pci },
195  { Intrinsic::hexagon_circ_ldub, Hexagon::L2_loadrub_pci },
196  { Intrinsic::hexagon_circ_ldh, Hexagon::L2_loadrh_pci },
197  { Intrinsic::hexagon_circ_lduh, Hexagon::L2_loadruh_pci },
198  { Intrinsic::hexagon_circ_ldw, Hexagon::L2_loadri_pci },
199  { Intrinsic::hexagon_circ_ldd, Hexagon::L2_loadrd_pci },
200  };
201  auto FLC = LoadPciMap.find(IntNo);
202  if (FLC != LoadPciMap.end()) {
203  EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i64 : MVT::i32;
204  EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
205  // Operands: { Base, Increment, Modifier, Chain }
206  auto Inc = cast<ConstantSDNode>(IntN->getOperand(5));
207  SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), dl, MVT::i32);
208  MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys,
209  { IntN->getOperand(2), I, IntN->getOperand(4),
210  IntN->getOperand(0) });
211  return Res;
212  }
213 
214  return nullptr;
215 }
216 
218  SDNode *IntN) {
219  // The "LoadN" is just a machine load instruction. The intrinsic also
220  // involves storing it. Generate an appropriate store to the location
221  // given in the intrinsic's operand(3).
222  uint64_t F = HII->get(LoadN->getMachineOpcode()).TSFlags;
223  unsigned SizeBits = (F >> HexagonII::MemAccessSizePos) &
225  unsigned Size = 1U << (SizeBits-1);
226 
227  SDLoc dl(IntN);
229  SDValue TS;
230  SDValue Loc = IntN->getOperand(3);
231 
232  if (Size >= 4)
233  TS = CurDAG->getStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, PI,
234  Size);
235  else
236  TS = CurDAG->getTruncStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc,
237  PI, MVT::getIntegerVT(Size * 8), Size);
238 
239  SDNode *StoreN;
240  {
241  HandleSDNode Handle(TS);
242  SelectStore(TS.getNode());
243  StoreN = Handle.getValue().getNode();
244  }
245 
246  // Load's results are { Loaded value, Updated pointer, Chain }
247  ReplaceUses(SDValue(IntN, 0), SDValue(LoadN, 1));
248  ReplaceUses(SDValue(IntN, 1), SDValue(StoreN, 0));
249  return StoreN;
250 }
251 
253  // The intrinsics for load circ/brev perform two operations:
254  // 1. Load a value V from the specified location, using the addressing
255  // mode corresponding to the intrinsic.
256  // 2. Store V into a specified location. This location is typically a
257  // local, temporary object.
258  // In many cases, the program using these intrinsics will immediately
259  // load V again from the local object. In those cases, when certain
260  // conditions are met, the last load can be removed.
261  // This function identifies and optimizes this pattern. If the pattern
262  // cannot be optimized, it returns nullptr, which will cause the load
263  // to be selected separately from the intrinsic (which will be handled
264  // in SelectIntrinsicWChain).
265 
266  SDValue Ch = N->getOperand(0);
267  SDValue Loc = N->getOperand(1);
268 
269  // Assume that the load and the intrinsic are connected directly with a
270  // chain:
271  // t1: i32,ch = int.load ..., ..., ..., Loc, ... // <-- C
272  // t2: i32,ch = load t1:1, Loc, ...
273  SDNode *C = Ch.getNode();
274 
275  if (C->getOpcode() != ISD::INTRINSIC_W_CHAIN)
276  return false;
277 
278  // The second load can only be eliminated if its extension type matches
279  // that of the load instruction corresponding to the intrinsic. The user
280  // can provide an address of an unsigned variable to store the result of
281  // a sign-extending intrinsic into (or the other way around).
282  ISD::LoadExtType IntExt;
283  switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) {
284  case Intrinsic::hexagon_circ_ldub:
285  case Intrinsic::hexagon_circ_lduh:
286  IntExt = ISD::ZEXTLOAD;
287  break;
288  case Intrinsic::hexagon_circ_ldw:
289  case Intrinsic::hexagon_circ_ldd:
290  IntExt = ISD::NON_EXTLOAD;
291  break;
292  default:
293  IntExt = ISD::SEXTLOAD;
294  break;
295  }
296  if (N->getExtensionType() != IntExt)
297  return false;
298 
299  // Make sure the target location for the loaded value in the load intrinsic
300  // is the location from which LD (or N) is loading.
301  if (C->getNumOperands() < 4 || Loc.getNode() != C->getOperand(3).getNode())
302  return false;
303 
306  SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) };
307  SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) };
308  ReplaceUses(F, T, array_lengthof(T));
309  // This transformation will leave the intrinsic dead. If it remains in
310  // the DAG, the selection code will see it again, but without the load,
311  // and it will generate a store that is normally required for it.
313  return true;
314  }
315  return false;
316 }
317 
318 // Convert the bit-reverse load intrinsic to appropriate target instruction.
320  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
321  return false;
322 
323  const SDLoc &dl(IntN);
324  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
325 
326  static const std::map<unsigned, unsigned> LoadBrevMap = {
327  { Intrinsic::hexagon_L2_loadrb_pbr, Hexagon::L2_loadrb_pbr },
328  { Intrinsic::hexagon_L2_loadrub_pbr, Hexagon::L2_loadrub_pbr },
329  { Intrinsic::hexagon_L2_loadrh_pbr, Hexagon::L2_loadrh_pbr },
330  { Intrinsic::hexagon_L2_loadruh_pbr, Hexagon::L2_loadruh_pbr },
331  { Intrinsic::hexagon_L2_loadri_pbr, Hexagon::L2_loadri_pbr },
332  { Intrinsic::hexagon_L2_loadrd_pbr, Hexagon::L2_loadrd_pbr }
333  };
334  auto FLI = LoadBrevMap.find(IntNo);
335  if (FLI != LoadBrevMap.end()) {
336  EVT ValTy =
337  (IntNo == Intrinsic::hexagon_L2_loadrd_pbr) ? MVT::i64 : MVT::i32;
338  EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
339  // Operands of Intrinsic: {chain, enum ID of intrinsic, baseptr,
340  // modifier}.
341  // Operands of target instruction: { Base, Modifier, Chain }.
343  FLI->second, dl, RTys,
344  {IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(0)});
345 
346  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
347  CurDAG->setNodeMemRefs(Res, {MemOp});
348 
349  ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
350  ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
351  ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
352  CurDAG->RemoveDeadNode(IntN);
353  return true;
354  }
355  return false;
356 }
357 
358 /// Generate a machine instruction node for the new circlar buffer intrinsics.
359 /// The new versions use a CSx register instead of the K field.
361  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
362  return false;
363 
364  SDLoc DL(IntN);
365  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
367 
368  static std::map<unsigned,unsigned> LoadNPcMap = {
369  { Intrinsic::hexagon_L2_loadrub_pci, Hexagon::PS_loadrub_pci },
370  { Intrinsic::hexagon_L2_loadrb_pci, Hexagon::PS_loadrb_pci },
371  { Intrinsic::hexagon_L2_loadruh_pci, Hexagon::PS_loadruh_pci },
372  { Intrinsic::hexagon_L2_loadrh_pci, Hexagon::PS_loadrh_pci },
373  { Intrinsic::hexagon_L2_loadri_pci, Hexagon::PS_loadri_pci },
374  { Intrinsic::hexagon_L2_loadrd_pci, Hexagon::PS_loadrd_pci },
375  { Intrinsic::hexagon_L2_loadrub_pcr, Hexagon::PS_loadrub_pcr },
376  { Intrinsic::hexagon_L2_loadrb_pcr, Hexagon::PS_loadrb_pcr },
377  { Intrinsic::hexagon_L2_loadruh_pcr, Hexagon::PS_loadruh_pcr },
378  { Intrinsic::hexagon_L2_loadrh_pcr, Hexagon::PS_loadrh_pcr },
379  { Intrinsic::hexagon_L2_loadri_pcr, Hexagon::PS_loadri_pcr },
380  { Intrinsic::hexagon_L2_loadrd_pcr, Hexagon::PS_loadrd_pcr }
381  };
382  auto FLI = LoadNPcMap.find (IntNo);
383  if (FLI != LoadNPcMap.end()) {
384  EVT ValTy = MVT::i32;
385  if (IntNo == Intrinsic::hexagon_L2_loadrd_pci ||
386  IntNo == Intrinsic::hexagon_L2_loadrd_pcr)
387  ValTy = MVT::i64;
388  EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
389  // Handle load.*_pci case which has 6 operands.
390  if (IntN->getNumOperands() == 6) {
391  auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
392  SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
393  // Operands: { Base, Increment, Modifier, Start, Chain }.
394  Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
395  IntN->getOperand(0) };
396  } else
397  // Handle load.*_pcr case which has 5 operands.
398  // Operands: { Base, Modifier, Start, Chain }.
399  Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
400  IntN->getOperand(0) };
401  MachineSDNode *Res = CurDAG->getMachineNode(FLI->second, DL, RTys, Ops);
402  ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
403  ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
404  ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
405  CurDAG->RemoveDeadNode(IntN);
406  return true;
407  }
408 
409  static std::map<unsigned,unsigned> StoreNPcMap = {
410  { Intrinsic::hexagon_S2_storerb_pci, Hexagon::PS_storerb_pci },
411  { Intrinsic::hexagon_S2_storerh_pci, Hexagon::PS_storerh_pci },
412  { Intrinsic::hexagon_S2_storerf_pci, Hexagon::PS_storerf_pci },
413  { Intrinsic::hexagon_S2_storeri_pci, Hexagon::PS_storeri_pci },
414  { Intrinsic::hexagon_S2_storerd_pci, Hexagon::PS_storerd_pci },
415  { Intrinsic::hexagon_S2_storerb_pcr, Hexagon::PS_storerb_pcr },
416  { Intrinsic::hexagon_S2_storerh_pcr, Hexagon::PS_storerh_pcr },
417  { Intrinsic::hexagon_S2_storerf_pcr, Hexagon::PS_storerf_pcr },
418  { Intrinsic::hexagon_S2_storeri_pcr, Hexagon::PS_storeri_pcr },
419  { Intrinsic::hexagon_S2_storerd_pcr, Hexagon::PS_storerd_pcr }
420  };
421  auto FSI = StoreNPcMap.find (IntNo);
422  if (FSI != StoreNPcMap.end()) {
423  EVT RTys[] = { MVT::i32, MVT::Other };
424  // Handle store.*_pci case which has 7 operands.
425  if (IntN->getNumOperands() == 7) {
426  auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
427  SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
428  // Operands: { Base, Increment, Modifier, Value, Start, Chain }.
429  Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
430  IntN->getOperand(6), IntN->getOperand(0) };
431  } else
432  // Handle store.*_pcr case which has 6 operands.
433  // Operands: { Base, Modifier, Value, Start, Chain }.
434  Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
435  IntN->getOperand(5), IntN->getOperand(0) };
436  MachineSDNode *Res = CurDAG->getMachineNode(FSI->second, DL, RTys, Ops);
437  ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
438  ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
439  CurDAG->RemoveDeadNode(IntN);
440  return true;
441  }
442 
443  return false;
444 }
445 
447  SDLoc dl(N);
448  LoadSDNode *LD = cast<LoadSDNode>(N);
449 
450  // Handle indexed loads.
452  if (AM != ISD::UNINDEXED) {
453  SelectIndexedLoad(LD, dl);
454  return;
455  }
456 
457  // Handle patterns using circ/brev load intrinsics.
458  if (tryLoadOfLoadIntrinsic(LD))
459  return;
460 
461  SelectCode(LD);
462 }
463 
465  SDValue Chain = ST->getChain();
466  SDValue Base = ST->getBasePtr();
467  SDValue Offset = ST->getOffset();
468  SDValue Value = ST->getValue();
469  // Get the constant value.
470  int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
471  EVT StoredVT = ST->getMemoryVT();
472  EVT ValueVT = Value.getValueType();
473 
474  bool IsValidInc = HII->isValidAutoIncImm(StoredVT, Inc);
475  unsigned Opcode = 0;
476 
477  assert(StoredVT.isSimple());
478  switch (StoredVT.getSimpleVT().SimpleTy) {
479  case MVT::i8:
480  Opcode = IsValidInc ? Hexagon::S2_storerb_pi : Hexagon::S2_storerb_io;
481  break;
482  case MVT::i16:
483  Opcode = IsValidInc ? Hexagon::S2_storerh_pi : Hexagon::S2_storerh_io;
484  break;
485  case MVT::i32:
486  case MVT::f32:
487  case MVT::v2i16:
488  case MVT::v4i8:
489  Opcode = IsValidInc ? Hexagon::S2_storeri_pi : Hexagon::S2_storeri_io;
490  break;
491  case MVT::i64:
492  case MVT::f64:
493  case MVT::v2i32:
494  case MVT::v4i16:
495  case MVT::v8i8:
496  Opcode = IsValidInc ? Hexagon::S2_storerd_pi : Hexagon::S2_storerd_io;
497  break;
498  case MVT::v64i8:
499  case MVT::v32i16:
500  case MVT::v16i32:
501  case MVT::v8i64:
502  case MVT::v128i8:
503  case MVT::v64i16:
504  case MVT::v32i32:
505  case MVT::v16i64:
506  if (isAlignedMemNode(ST)) {
507  if (ST->isNonTemporal())
508  Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai;
509  else
510  Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai;
511  } else {
512  Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai;
513  }
514  break;
515  default:
516  llvm_unreachable("Unexpected memory type in indexed store");
517  }
518 
519  if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
520  assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
521  Value = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo,
522  dl, MVT::i32, Value);
523  }
524 
525  SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
526  MachineMemOperand *MemOp = ST->getMemOperand();
527 
528  // Next address Chain
529  SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) };
530  SDValue To[2];
531 
532  if (IsValidInc) {
533  // Build post increment store.
534  SDValue Ops[] = { Base, IncV, Value, Chain };
536  Ops);
537  CurDAG->setNodeMemRefs(S, {MemOp});
538  To[0] = SDValue(S, 0);
539  To[1] = SDValue(S, 1);
540  } else {
541  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
542  SDValue Ops[] = { Base, Zero, Value, Chain };
543  MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
544  CurDAG->setNodeMemRefs(S, {MemOp});
545  To[1] = SDValue(S, 0);
546  MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
547  Base, IncV);
548  To[0] = SDValue(A, 0);
549  }
550 
551  ReplaceUses(From, To, 2);
552  CurDAG->RemoveDeadNode(ST);
553 }
554 
556  SDLoc dl(N);
557  StoreSDNode *ST = cast<StoreSDNode>(N);
558 
559  // Handle indexed stores.
561  if (AM != ISD::UNINDEXED) {
562  SelectIndexedStore(ST, dl);
563  return;
564  }
565 
566  SelectCode(ST);
567 }
568 
570  SDLoc dl(N);
571  SDValue Shl_0 = N->getOperand(0);
572  SDValue Shl_1 = N->getOperand(1);
573 
574  auto Default = [this,N] () -> void { SelectCode(N); };
575 
576  if (N->getValueType(0) != MVT::i32 || Shl_1.getOpcode() != ISD::Constant)
577  return Default();
578 
579  // RHS is const.
580  int32_t ShlConst = cast<ConstantSDNode>(Shl_1)->getSExtValue();
581 
582  if (Shl_0.getOpcode() == ISD::MUL) {
583  SDValue Mul_0 = Shl_0.getOperand(0); // Val
584  SDValue Mul_1 = Shl_0.getOperand(1); // Const
585  // RHS of mul is const.
586  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mul_1)) {
587  int32_t ValConst = C->getSExtValue() << ShlConst;
588  if (isInt<9>(ValConst)) {
589  SDValue Val = CurDAG->getTargetConstant(ValConst, dl, MVT::i32);
590  SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
591  MVT::i32, Mul_0, Val);
592  ReplaceNode(N, Result);
593  return;
594  }
595  }
596  return Default();
597  }
598 
599  if (Shl_0.getOpcode() == ISD::SUB) {
600  SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
601  SDValue Sub_1 = Shl_0.getOperand(1); // Val
602  if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Sub_0)) {
603  if (C1->getSExtValue() != 0 || Sub_1.getOpcode() != ISD::SHL)
604  return Default();
605  SDValue Shl2_0 = Sub_1.getOperand(0); // Val
606  SDValue Shl2_1 = Sub_1.getOperand(1); // Const
607  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(Shl2_1)) {
608  int32_t ValConst = 1 << (ShlConst + C2->getSExtValue());
609  if (isInt<9>(-ValConst)) {
610  SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, MVT::i32);
611  SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
612  MVT::i32, Shl2_0, Val);
613  ReplaceNode(N, Result);
614  return;
615  }
616  }
617  }
618  }
619 
620  return Default();
621 }
622 
623 //
624 // Handling intrinsics for circular load and bitreverse load.
625 //
630  return;
631  }
632 
633  // Handle bit-reverse load intrinsics.
634  if (SelectBrevLdIntrinsic(N))
635  return;
636 
637  if (SelectNewCircIntrinsic(N))
638  return;
639 
640  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
641  if (IntNo == Intrinsic::hexagon_V6_vgathermw ||
642  IntNo == Intrinsic::hexagon_V6_vgathermw_128B ||
643  IntNo == Intrinsic::hexagon_V6_vgathermh ||
644  IntNo == Intrinsic::hexagon_V6_vgathermh_128B ||
645  IntNo == Intrinsic::hexagon_V6_vgathermhw ||
646  IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) {
647  SelectV65Gather(N);
648  return;
649  }
650  if (IntNo == Intrinsic::hexagon_V6_vgathermwq ||
651  IntNo == Intrinsic::hexagon_V6_vgathermwq_128B ||
652  IntNo == Intrinsic::hexagon_V6_vgathermhq ||
653  IntNo == Intrinsic::hexagon_V6_vgathermhq_128B ||
654  IntNo == Intrinsic::hexagon_V6_vgathermhwq ||
655  IntNo == Intrinsic::hexagon_V6_vgathermhwq_128B) {
657  return;
658  }
659 
660  SelectCode(N);
661 }
662 
664  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
665  unsigned Bits;
666  switch (IID) {
667  case Intrinsic::hexagon_S2_vsplatrb:
668  Bits = 8;
669  break;
670  case Intrinsic::hexagon_S2_vsplatrh:
671  Bits = 16;
672  break;
673  case Intrinsic::hexagon_V6_vaddcarry:
674  case Intrinsic::hexagon_V6_vaddcarry_128B:
675  case Intrinsic::hexagon_V6_vsubcarry:
676  case Intrinsic::hexagon_V6_vsubcarry_128B:
678  return;
679  default:
680  SelectCode(N);
681  return;
682  }
683 
684  SDValue V = N->getOperand(1);
685  SDValue U;
686  if (keepsLowBits(V, Bits, U)) {
687  SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
688  N->getOperand(0), U);
689  ReplaceNode(N, R.getNode());
690  SelectCode(R.getNode());
691  return;
692  }
693  SelectCode(N);
694 }
695 
696 //
697 // Map floating point constant values.
698 //
700  SDLoc dl(N);
702  APInt A = CN->getValueAPF().bitcastToAPInt();
703  if (N->getValueType(0) == MVT::f32) {
704  SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32);
705  ReplaceNode(N, CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, V));
706  return;
707  }
708  if (N->getValueType(0) == MVT::f64) {
709  SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i64);
710  ReplaceNode(N, CurDAG->getMachineNode(Hexagon::CONST64, dl, MVT::f64, V));
711  return;
712  }
713 
714  SelectCode(N);
715 }
716 
717 //
718 // Map boolean values.
719 //
721  if (N->getValueType(0) == MVT::i1) {
722  assert(!(cast<ConstantSDNode>(N)->getZExtValue() >> 1));
723  unsigned Opc = (cast<ConstantSDNode>(N)->getSExtValue() != 0)
724  ? Hexagon::PS_true
725  : Hexagon::PS_false;
727  return;
728  }
729 
730  SelectCode(N);
731 }
732 
734  MachineFrameInfo &MFI = MF->getFrameInfo();
735  const HexagonFrameLowering *HFI = HST->getFrameLowering();
736  int FX = cast<FrameIndexSDNode>(N)->getIndex();
737  unsigned StkA = HFI->getStackAlignment();
738  unsigned MaxA = MFI.getMaxAlignment();
740  SDLoc DL(N);
741  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
742  SDNode *R = nullptr;
743 
744  // Use PS_fi when:
745  // - the object is fixed, or
746  // - there are no objects with higher-than-default alignment, or
747  // - there are no dynamically allocated objects.
748  // Otherwise, use PS_fia.
749  if (FX < 0 || MaxA <= StkA || !MFI.hasVarSizedObjects()) {
750  R = CurDAG->getMachineNode(Hexagon::PS_fi, DL, MVT::i32, FI, Zero);
751  } else {
752  auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
753  unsigned AR = HMFI.getStackAlignBaseVReg();
754  SDValue CH = CurDAG->getEntryNode();
755  SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
756  R = CurDAG->getMachineNode(Hexagon::PS_fia, DL, MVT::i32, Ops);
757  }
758 
759  ReplaceNode(N, R);
760 }
761 
763  unsigned OpcCarry = N->getOpcode() == HexagonISD::ADDC ? Hexagon::A4_addp_c
764  : Hexagon::A4_subp_c;
765  SDNode *C = CurDAG->getMachineNode(OpcCarry, SDLoc(N), N->getVTList(),
766  { N->getOperand(0), N->getOperand(1),
767  N->getOperand(2) });
768  ReplaceNode(N, C);
769 }
770 
772  MVT ResTy = N->getValueType(0).getSimpleVT();
773  if (HST->isHVXVectorType(ResTy, true))
774  return SelectHvxVAlign(N);
775 
776  const SDLoc &dl(N);
777  unsigned VecLen = ResTy.getSizeInBits();
778  if (VecLen == 32) {
779  SDValue Ops[] = {
780  CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
781  N->getOperand(0),
782  CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
783  N->getOperand(1),
784  CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
785  };
786  SDNode *R = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
787  MVT::i64, Ops);
788 
789  // Shift right by "(Addr & 0x3) * 8" bytes.
790  SDValue M0 = CurDAG->getTargetConstant(0x18, dl, MVT::i32);
791  SDValue M1 = CurDAG->getTargetConstant(0x03, dl, MVT::i32);
792  SDNode *C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32,
793  M0, N->getOperand(2), M1);
794  SDNode *S = CurDAG->getMachineNode(Hexagon::S2_lsr_r_p, dl, MVT::i64,
795  SDValue(R, 0), SDValue(C, 0));
796  SDValue E = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, ResTy,
797  SDValue(S, 0));
798  ReplaceNode(N, E.getNode());
799  } else {
800  assert(VecLen == 64);
801  SDNode *Pu = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::v8i1,
802  N->getOperand(2));
803  SDNode *VA = CurDAG->getMachineNode(Hexagon::S2_valignrb, dl, ResTy,
804  N->getOperand(0), N->getOperand(1),
805  SDValue(Pu,0));
806  ReplaceNode(N, VA);
807  }
808 }
809 
811  const SDLoc &dl(N);
812  SDValue A = N->getOperand(1);
813  int Mask = -cast<ConstantSDNode>(A.getNode())->getSExtValue();
814  assert(isPowerOf2_32(-Mask));
815 
816  SDValue M = CurDAG->getTargetConstant(Mask, dl, MVT::i32);
817  SDNode *AA = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
818  N->getOperand(0), M);
819  ReplaceNode(N, AA);
820 }
821 
822 // Handle these nodes here to avoid having to write patterns for all
823 // combinations of input/output types. In all cases, the resulting
824 // instruction is the same.
826  SDValue Op = N->getOperand(0);
827  MVT OpTy = Op.getValueType().getSimpleVT();
828  SDNode *T = CurDAG->MorphNodeTo(N, N->getOpcode(),
829  CurDAG->getVTList(OpTy), {Op});
830  ReplaceNode(T, Op.getNode());
831 }
832 
834  MVT ResTy = N->getValueType(0).getSimpleVT();
835  SDNode *T = CurDAG->getMachineNode(Hexagon::C2_mask, SDLoc(N), ResTy,
836  N->getOperand(0));
837  ReplaceNode(N, T);
838 }
839 
841  const SDLoc &dl(N);
842  MVT ResTy = N->getValueType(0).getSimpleVT();
843  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
844  SDNode *T = CurDAG->getMachineNode(Hexagon::A4_vcmpbgtui, dl, ResTy,
845  N->getOperand(0), Zero);
846  ReplaceNode(N, T);
847 }
848 
850  const SDLoc &dl(N);
851  MVT ResTy = N->getValueType(0).getSimpleVT();
852 
854  SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
855  SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandvrt, dl, ResTy,
856  N->getOperand(0), SDValue(R,0));
857  ReplaceNode(N, T);
858 }
859 
861  const SDLoc &dl(N);
862  MVT ResTy = N->getValueType(0).getSimpleVT();
863 
865  SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
866  SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandqrt, dl, ResTy,
867  N->getOperand(0), SDValue(R,0));
868  ReplaceNode(N, T);
869 }
870 
872  if (N->isMachineOpcode())
873  return N->setNodeId(-1); // Already selected.
874 
875  switch (N->getOpcode()) {
876  case ISD::Constant: return SelectConstant(N);
877  case ISD::ConstantFP: return SelectConstantFP(N);
878  case ISD::FrameIndex: return SelectFrameIndex(N);
879  case ISD::SHL: return SelectSHL(N);
880  case ISD::LOAD: return SelectLoad(N);
881  case ISD::STORE: return SelectStore(N);
884 
885  case HexagonISD::ADDC:
886  case HexagonISD::SUBC: return SelectAddSubCarry(N);
887  case HexagonISD::VALIGN: return SelectVAlign(N);
889  case HexagonISD::TYPECAST: return SelectTypecast(N);
890  case HexagonISD::P2D: return SelectP2D(N);
891  case HexagonISD::D2P: return SelectD2P(N);
892  case HexagonISD::Q2V: return SelectQ2V(N);
893  case HexagonISD::V2Q: return SelectV2Q(N);
894  }
895 
896  if (HST->useHVXOps()) {
897  switch (N->getOpcode()) {
898  case ISD::VECTOR_SHUFFLE: return SelectHvxShuffle(N);
899  case HexagonISD::VROR: return SelectHvxRor(N);
900  }
901  }
902 
903  SelectCode(N);
904 }
905 
907 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
908  std::vector<SDValue> &OutOps) {
909  SDValue Inp = Op, Res;
910 
911  switch (ConstraintID) {
912  default:
913  return true;
915  case InlineAsm::Constraint_o: // Offsetable.
916  case InlineAsm::Constraint_v: // Not offsetable.
917  case InlineAsm::Constraint_m: // Memory.
918  if (SelectAddrFI(Inp, Res))
919  OutOps.push_back(Res);
920  else
921  OutOps.push_back(Inp);
922  break;
923  }
924 
925  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
926  return false;
927 }
928 
929 
930 static bool isMemOPCandidate(SDNode *I, SDNode *U) {
931  // I is an operand of U. Check if U is an arithmetic (binary) operation
932  // usable in a memop, where the other operand is a loaded value, and the
933  // result of U is stored in the same location.
934 
935  if (!U->hasOneUse())
936  return false;
937  unsigned Opc = U->getOpcode();
938  switch (Opc) {
939  case ISD::ADD:
940  case ISD::SUB:
941  case ISD::AND:
942  case ISD::OR:
943  break;
944  default:
945  return false;
946  }
947 
948  SDValue S0 = U->getOperand(0);
949  SDValue S1 = U->getOperand(1);
950  SDValue SY = (S0.getNode() == I) ? S1 : S0;
951 
952  SDNode *UUse = *U->use_begin();
953  if (UUse->getNumValues() != 1)
954  return false;
955 
956  // Check if one of the inputs to U is a load instruction and the output
957  // is used by a store instruction. If so and they also have the same
958  // base pointer, then don't preoprocess this node sequence as it
959  // can be matched to a memop.
960  SDNode *SYNode = SY.getNode();
961  if (UUse->getOpcode() == ISD::STORE && SYNode->getOpcode() == ISD::LOAD) {
962  SDValue LDBasePtr = cast<MemSDNode>(SYNode)->getBasePtr();
963  SDValue STBasePtr = cast<MemSDNode>(UUse)->getBasePtr();
964  if (LDBasePtr == STBasePtr)
965  return true;
966  }
967  return false;
968 }
969 
970 
971 // Transform: (or (select c x 0) z) -> (select c (or x z) z)
972 // (or (select c 0 y) z) -> (select c z (or y z))
973 void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
974  SelectionDAG &DAG = *CurDAG;
975 
976  for (auto I : Nodes) {
977  if (I->getOpcode() != ISD::OR)
978  continue;
979 
980  auto IsZero = [] (const SDValue &V) -> bool {
981  if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode()))
982  return SC->isNullValue();
983  return false;
984  };
985  auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
986  if (Op.getOpcode() != ISD::SELECT)
987  return false;
988  return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2));
989  };
990 
991  SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
992  EVT VT = I->getValueType(0);
993  bool SelN0 = IsSelect0(N0);
994  SDValue SOp = SelN0 ? N0 : N1;
995  SDValue VOp = SelN0 ? N1 : N0;
996 
997  if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) {
998  SDValue SC = SOp.getOperand(0);
999  SDValue SX = SOp.getOperand(1);
1000  SDValue SY = SOp.getOperand(2);
1001  SDLoc DLS = SOp;
1002  if (IsZero(SY)) {
1003  SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
1004  SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
1005  DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1006  } else if (IsZero(SX)) {
1007  SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
1008  SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
1009  DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1010  }
1011  }
1012  }
1013 }
1014 
1015 // Transform: (store ch val (add x (add (shl y c) e)))
1016 // to: (store ch val (add x (shl (add y d) c))),
1017 // where e = (shl d c) for some integer d.
1018 // The purpose of this is to enable generation of loads/stores with
1019 // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1020 // value c must be 0, 1 or 2.
1021 void HexagonDAGToDAGISel::ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes) {
1022  SelectionDAG &DAG = *CurDAG;
1023 
1024  for (auto I : Nodes) {
1025  if (I->getOpcode() != ISD::STORE)
1026  continue;
1027 
1028  // I matched: (store ch val Off)
1029  SDValue Off = I->getOperand(2);
1030  // Off needs to match: (add x (add (shl y c) (shl d c))))
1031  if (Off.getOpcode() != ISD::ADD)
1032  continue;
1033  // Off matched: (add x T0)
1034  SDValue T0 = Off.getOperand(1);
1035  // T0 needs to match: (add T1 T2):
1036  if (T0.getOpcode() != ISD::ADD)
1037  continue;
1038  // T0 matched: (add T1 T2)
1039  SDValue T1 = T0.getOperand(0);
1040  SDValue T2 = T0.getOperand(1);
1041  // T1 needs to match: (shl y c)
1042  if (T1.getOpcode() != ISD::SHL)
1043  continue;
1044  SDValue C = T1.getOperand(1);
1046  if (CN == nullptr)
1047  continue;
1048  unsigned CV = CN->getZExtValue();
1049  if (CV > 2)
1050  continue;
1051  // T2 needs to match e, where e = (shl d c) for some d.
1053  if (EN == nullptr)
1054  continue;
1055  unsigned EV = EN->getZExtValue();
1056  if (EV % (1 << CV) != 0)
1057  continue;
1058  unsigned DV = EV / (1 << CV);
1059 
1060  // Replace T0 with: (shl (add y d) c)
1061  SDLoc DL = SDLoc(I);
1062  EVT VT = T0.getValueType();
1063  SDValue D = DAG.getConstant(DV, DL, VT);
1064  // NewAdd = (add y d)
1065  SDValue NewAdd = DAG.getNode(ISD::ADD, DL, VT, T1.getOperand(0), D);
1066  // NewShl = (shl NewAdd c)
1067  SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
1068  ReplaceNode(T0.getNode(), NewShl.getNode());
1069  }
1070 }
1071 
1072 // Transform: (load ch (add x (and (srl y c) Mask)))
1073 // to: (load ch (add x (shl (srl y d) d-c)))
1074 // where
1075 // Mask = 00..0 111..1 0.0
1076 // | | +-- d-c 0s, and d-c is 0, 1 or 2.
1077 // | +-------- 1s
1078 // +-------------- at most c 0s
1079 // Motivating example:
1080 // DAG combiner optimizes (add x (shl (srl y 5) 2))
1081 // to (add x (and (srl y 3) 1FFFFFFC))
1082 // which results in a constant-extended and(##...,lsr). This transformation
1083 // undoes this simplification for cases where the shl can be folded into
1084 // an addressing mode.
1085 void HexagonDAGToDAGISel::ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes) {
1086  SelectionDAG &DAG = *CurDAG;
1087 
1088  for (SDNode *N : Nodes) {
1089  unsigned Opc = N->getOpcode();
1090  if (Opc != ISD::LOAD && Opc != ISD::STORE)
1091  continue;
1092  SDValue Addr = Opc == ISD::LOAD ? N->getOperand(1) : N->getOperand(2);
1093  // Addr must match: (add x T0)
1094  if (Addr.getOpcode() != ISD::ADD)
1095  continue;
1096  SDValue T0 = Addr.getOperand(1);
1097  // T0 must match: (and T1 Mask)
1098  if (T0.getOpcode() != ISD::AND)
1099  continue;
1100 
1101  // We have an AND.
1102  //
1103  // Check the first operand. It must be: (srl y c).
1104  SDValue S = T0.getOperand(0);
1105  if (S.getOpcode() != ISD::SRL)
1106  continue;
1108  if (SN == nullptr)
1109  continue;
1110  if (SN->getAPIntValue().getBitWidth() != 32)
1111  continue;
1112  uint32_t CV = SN->getZExtValue();
1113 
1114  // Check the second operand: the supposed mask.
1116  if (MN == nullptr)
1117  continue;
1118  if (MN->getAPIntValue().getBitWidth() != 32)
1119  continue;
1120  uint32_t Mask = MN->getZExtValue();
1121  // Examine the mask.
1122  uint32_t TZ = countTrailingZeros(Mask);
1123  uint32_t M1 = countTrailingOnes(Mask >> TZ);
1124  uint32_t LZ = countLeadingZeros(Mask);
1125  // Trailing zeros + middle ones + leading zeros must equal the width.
1126  if (TZ + M1 + LZ != 32)
1127  continue;
1128  // The number of trailing zeros will be encoded in the addressing mode.
1129  if (TZ > 2)
1130  continue;
1131  // The number of leading zeros must be at most c.
1132  if (LZ > CV)
1133  continue;
1134 
1135  // All looks good.
1136  SDValue Y = S.getOperand(0);
1137  EVT VT = Addr.getValueType();
1138  SDLoc dl(S);
1139  // TZ = D-C, so D = TZ+C.
1140  SDValue D = DAG.getConstant(TZ+CV, dl, VT);
1141  SDValue DC = DAG.getConstant(TZ, dl, VT);
1142  SDValue NewSrl = DAG.getNode(ISD::SRL, dl, VT, Y, D);
1143  SDValue NewShl = DAG.getNode(ISD::SHL, dl, VT, NewSrl, DC);
1144  ReplaceNode(T0.getNode(), NewShl.getNode());
1145  }
1146 }
1147 
1148 // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1149 // (op ... 1 ...))
1150 void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) {
1151  SelectionDAG &DAG = *CurDAG;
1152 
1153  for (SDNode *N : Nodes) {
1154  unsigned Opc = N->getOpcode();
1155  if (Opc != ISD::ZERO_EXTEND)
1156  continue;
1157  SDValue OpI1 = N->getOperand(0);
1158  EVT OpVT = OpI1.getValueType();
1159  if (!OpVT.isSimple() || OpVT.getSimpleVT() != MVT::i1)
1160  continue;
1161  for (auto I = N->use_begin(), E = N->use_end(); I != E; ++I) {
1162  SDNode *U = *I;
1163  if (U->getNumValues() != 1)
1164  continue;
1165  EVT UVT = U->getValueType(0);
1166  if (!UVT.isSimple() || !UVT.isInteger() || UVT.getSimpleVT() == MVT::i1)
1167  continue;
1168  if (isMemOPCandidate(N, U))
1169  continue;
1170 
1171  // Potentially simplifiable operation.
1172  unsigned I1N = I.getOperandNo();
1174  for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i)
1175  Ops[i] = U->getOperand(i);
1176  EVT BVT = Ops[I1N].getValueType();
1177 
1178  SDLoc dl(U);
1179  SDValue C0 = DAG.getConstant(0, dl, BVT);
1180  SDValue C1 = DAG.getConstant(1, dl, BVT);
1181  SDValue If0, If1;
1182 
1183  if (isa<MachineSDNode>(U)) {
1184  unsigned UseOpc = U->getMachineOpcode();
1185  Ops[I1N] = C0;
1186  If0 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1187  Ops[I1N] = C1;
1188  If1 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1189  } else {
1190  unsigned UseOpc = U->getOpcode();
1191  Ops[I1N] = C0;
1192  If0 = DAG.getNode(UseOpc, dl, UVT, Ops);
1193  Ops[I1N] = C1;
1194  If1 = DAG.getNode(UseOpc, dl, UVT, Ops);
1195  }
1196  SDValue Sel = DAG.getNode(ISD::SELECT, dl, UVT, OpI1, If1, If0);
1197  DAG.ReplaceAllUsesWith(U, Sel.getNode());
1198  }
1199  }
1200 }
1201 
1203  // Repack all nodes before calling each preprocessing function,
1204  // because each of them can modify the set of nodes.
1205  auto getNodes = [this] () -> std::vector<SDNode*> {
1206  std::vector<SDNode*> T;
1207  T.reserve(CurDAG->allnodes_size());
1208  for (SDNode &N : CurDAG->allnodes())
1209  T.push_back(&N);
1210  return T;
1211  };
1212 
1213  // Transform: (or (select c x 0) z) -> (select c (or x z) z)
1214  // (or (select c 0 y) z) -> (select c z (or y z))
1215  ppSimplifyOrSelect0(getNodes());
1216 
1217  // Transform: (store ch val (add x (add (shl y c) e)))
1218  // to: (store ch val (add x (shl (add y d) c))),
1219  // where e = (shl d c) for some integer d.
1220  // The purpose of this is to enable generation of loads/stores with
1221  // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1222  // value c must be 0, 1 or 2.
1223  ppAddrReorderAddShl(getNodes());
1224 
1225  // Transform: (load ch (add x (and (srl y c) Mask)))
1226  // to: (load ch (add x (shl (srl y d) d-c)))
1227  // where
1228  // Mask = 00..0 111..1 0.0
1229  // | | +-- d-c 0s, and d-c is 0, 1 or 2.
1230  // | +-------- 1s
1231  // +-------------- at most c 0s
1232  // Motivating example:
1233  // DAG combiner optimizes (add x (shl (srl y 5) 2))
1234  // to (add x (and (srl y 3) 1FFFFFFC))
1235  // which results in a constant-extended and(##...,lsr). This transformation
1236  // undoes this simplification for cases where the shl can be folded into
1237  // an addressing mode.
1238  ppAddrRewriteAndSrl(getNodes());
1239 
1240  // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1241  // (op ... 1 ...))
1242  ppHoistZextI1(getNodes());
1243 
1244  DEBUG_WITH_TYPE("isel", {
1245  dbgs() << "Preprocessed (Hexagon) selection DAG:";
1246  CurDAG->dump();
1247  });
1248 
1250  rebalanceAddressTrees();
1251 
1252  DEBUG_WITH_TYPE("isel", {
1253  dbgs() << "Address tree balanced selection DAG:";
1254  CurDAG->dump();
1255  });
1256  }
1257 }
1258 
1260  auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget());
1261  auto &HFI = *HST.getFrameLowering();
1262  if (!HFI.needsAligna(*MF))
1263  return;
1264 
1265  MachineFrameInfo &MFI = MF->getFrameInfo();
1266  MachineBasicBlock *EntryBB = &MF->front();
1267  unsigned AR = FuncInfo->CreateReg(MVT::i32);
1268  unsigned MaxA = MFI.getMaxAlignment();
1269  BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AR)
1270  .addImm(MaxA);
1271  MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
1272 }
1273 
1274 // Match a frame index that can be used in an addressing mode.
1276  if (N.getOpcode() != ISD::FrameIndex)
1277  return false;
1278  auto &HFI = *HST->getFrameLowering();
1279  MachineFrameInfo &MFI = MF->getFrameInfo();
1280  int FX = cast<FrameIndexSDNode>(N)->getIndex();
1281  if (!MFI.isFixedObjectIndex(FX) && HFI.needsAligna(*MF))
1282  return false;
1284  return true;
1285 }
1286 
1288  return SelectGlobalAddress(N, R, false, 0);
1289 }
1290 
1292  return SelectGlobalAddress(N, R, true, 0);
1293 }
1294 
1296  return SelectAnyImmediate(N, R, 0);
1297 }
1298 
1300  return SelectAnyImmediate(N, R, 0);
1301 }
1303  return SelectAnyImmediate(N, R, 1);
1304 }
1306  return SelectAnyImmediate(N, R, 2);
1307 }
1309  return SelectAnyImmediate(N, R, 3);
1310 }
1311 
1313  EVT T = N.getValueType();
1314  if (!T.isInteger() || T.getSizeInBits() != 32 || !isa<ConstantSDNode>(N))
1315  return false;
1316  R = N;
1317  return true;
1318 }
1319 
1321  uint32_t LogAlign) {
1322  auto IsAligned = [LogAlign] (uint64_t V) -> bool {
1323  return alignTo(V, (uint64_t)1 << LogAlign) == V;
1324  };
1325 
1326  switch (N.getOpcode()) {
1327  case ISD::Constant: {
1328  if (N.getValueType() != MVT::i32)
1329  return false;
1330  int32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
1331  if (!IsAligned(V))
1332  return false;
1333  R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
1334  return true;
1335  }
1336  case HexagonISD::JT:
1337  case HexagonISD::CP:
1338  // These are assumed to always be aligned at least 8-byte boundary.
1339  if (LogAlign > 3)
1340  return false;
1341  R = N.getOperand(0);
1342  return true;
1343  case ISD::ExternalSymbol:
1344  // Symbols may be aligned at any boundary.
1345  if (LogAlign > 0)
1346  return false;
1347  R = N;
1348  return true;
1349  case ISD::BlockAddress:
1350  // Block address is always aligned at least 4-byte boundary.
1351  if (LogAlign > 2 || !IsAligned(cast<BlockAddressSDNode>(N)->getOffset()))
1352  return false;
1353  R = N;
1354  return true;
1355  }
1356 
1357  if (SelectGlobalAddress(N, R, false, LogAlign) ||
1358  SelectGlobalAddress(N, R, true, LogAlign))
1359  return true;
1360 
1361  return false;
1362 }
1363 
1365  bool UseGP, uint32_t LogAlign) {
1366  auto IsAligned = [LogAlign] (uint64_t V) -> bool {
1367  return alignTo(V, (uint64_t)1 << LogAlign) == V;
1368  };
1369 
1370  switch (N.getOpcode()) {
1371  case ISD::ADD: {
1372  SDValue N0 = N.getOperand(0);
1373  SDValue N1 = N.getOperand(1);
1374  unsigned GAOpc = N0.getOpcode();
1375  if (UseGP && GAOpc != HexagonISD::CONST32_GP)
1376  return false;
1377  if (!UseGP && GAOpc != HexagonISD::CONST32)
1378  return false;
1379  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) {
1380  SDValue Addr = N0.getOperand(0);
1381  // For the purpose of alignment, sextvalue and zextvalue are the same.
1382  if (!IsAligned(Const->getZExtValue()))
1383  return false;
1384  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) {
1385  if (GA->getOpcode() == ISD::TargetGlobalAddress) {
1386  uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
1387  R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const),
1388  N.getValueType(), NewOff);
1389  return true;
1390  }
1391  }
1392  }
1393  break;
1394  }
1395  case HexagonISD::CP:
1396  case HexagonISD::JT:
1397  case HexagonISD::CONST32:
1398  // The operand(0) of CONST32 is TargetGlobalAddress, which is what we
1399  // want in the instruction.
1400  if (!UseGP)
1401  R = N.getOperand(0);
1402  return !UseGP;
1404  if (UseGP)
1405  R = N.getOperand(0);
1406  return UseGP;
1407  default:
1408  return false;
1409  }
1410 
1411  return false;
1412 }
1413 
1415  // This (complex pattern) function is meant to detect a sign-extension
1416  // i32->i64 on a per-operand basis. This would allow writing single
1417  // patterns that would cover a number of combinations of different ways
1418  // a sign-extensions could be written. For example:
1419  // (mul (DetectUseSxtw x) (DetectUseSxtw y)) -> (M2_dpmpyss_s0 x y)
1420  // could match either one of these:
1421  // (mul (sext x) (sext_inreg y))
1422  // (mul (sext-load *p) (sext_inreg y))
1423  // (mul (sext_inreg x) (sext y))
1424  // etc.
1425  //
1426  // The returned value will have type i64 and its low word will
1427  // contain the value being extended. The high bits are not specified.
1428  // The returned type is i64 because the original type of N was i64,
1429  // but the users of this function should only use the low-word of the
1430  // result, e.g.
1431  // (mul sxtw:x, sxtw:y) -> (M2_dpmpyss_s0 (LoReg sxtw:x), (LoReg sxtw:y))
1432 
1433  if (N.getValueType() != MVT::i64)
1434  return false;
1435  unsigned Opc = N.getOpcode();
1436  switch (Opc) {
1437  case ISD::SIGN_EXTEND:
1438  case ISD::SIGN_EXTEND_INREG: {
1439  // sext_inreg has the source type as a separate operand.
1440  EVT T = Opc == ISD::SIGN_EXTEND
1441  ? N.getOperand(0).getValueType()
1442  : cast<VTSDNode>(N.getOperand(1))->getVT();
1443  unsigned SW = T.getSizeInBits();
1444  if (SW == 32)
1445  R = N.getOperand(0);
1446  else if (SW < 32)
1447  R = N;
1448  else
1449  return false;
1450  break;
1451  }
1452  case ISD::LOAD: {
1453  LoadSDNode *L = cast<LoadSDNode>(N);
1454  if (L->getExtensionType() != ISD::SEXTLOAD)
1455  return false;
1456  // All extending loads extend to i32, so even if the value in
1457  // memory is shorter than 32 bits, it will be i32 after the load.
1458  if (L->getMemoryVT().getSizeInBits() > 32)
1459  return false;
1460  R = N;
1461  break;
1462  }
1463  case ISD::SRA: {
1464  auto *S = dyn_cast<ConstantSDNode>(N.getOperand(1));
1465  if (!S || S->getZExtValue() != 32)
1466  return false;
1467  R = N;
1468  break;
1469  }
1470  default:
1471  return false;
1472  }
1473  EVT RT = R.getValueType();
1474  if (RT == MVT::i64)
1475  return true;
1476  assert(RT == MVT::i32);
1477  // This is only to produce a value of type i64. Do not rely on the
1478  // high bits produced by this.
1479  const SDLoc &dl(N);
1480  SDValue Ops[] = {
1481  CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
1482  R, CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
1483  R, CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
1484  };
1485  SDNode *T = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
1486  MVT::i64, Ops);
1487  R = SDValue(T, 0);
1488  return true;
1489 }
1490 
1491 bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
1492  SDValue &Src) {
1493  unsigned Opc = Val.getOpcode();
1494  switch (Opc) {
1495  case ISD::SIGN_EXTEND:
1496  case ISD::ZERO_EXTEND:
1497  case ISD::ANY_EXTEND: {
1498  const SDValue &Op0 = Val.getOperand(0);
1499  EVT T = Op0.getValueType();
1500  if (T.isInteger() && T.getSizeInBits() == NumBits) {
1501  Src = Op0;
1502  return true;
1503  }
1504  break;
1505  }
1507  case ISD::AssertSext:
1508  case ISD::AssertZext:
1509  if (Val.getOperand(0).getValueType().isInteger()) {
1510  VTSDNode *T = cast<VTSDNode>(Val.getOperand(1));
1511  if (T->getVT().getSizeInBits() == NumBits) {
1512  Src = Val.getOperand(0);
1513  return true;
1514  }
1515  }
1516  break;
1517  case ISD::AND: {
1518  // Check if this is an AND with NumBits of lower bits set to 1.
1519  uint64_t Mask = (1 << NumBits) - 1;
1520  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1521  if (C->getZExtValue() == Mask) {
1522  Src = Val.getOperand(1);
1523  return true;
1524  }
1525  }
1526  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1527  if (C->getZExtValue() == Mask) {
1528  Src = Val.getOperand(0);
1529  return true;
1530  }
1531  }
1532  break;
1533  }
1534  case ISD::OR:
1535  case ISD::XOR: {
1536  // OR/XOR with the lower NumBits bits set to 0.
1537  uint64_t Mask = (1 << NumBits) - 1;
1538  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1539  if ((C->getZExtValue() & Mask) == 0) {
1540  Src = Val.getOperand(1);
1541  return true;
1542  }
1543  }
1544  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1545  if ((C->getZExtValue() & Mask) == 0) {
1546  Src = Val.getOperand(0);
1547  return true;
1548  }
1549  }
1550  }
1551  default:
1552  break;
1553  }
1554  return false;
1555 }
1556 
1557 bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
1558  return N->getAlignment() >= N->getMemoryVT().getStoreSize();
1559 }
1560 
1561 bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const {
1562  unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF);
1563  switch (N->getMemoryVT().getStoreSize()) {
1564  case 1:
1565  return StackSize <= 56; // 1*2^6 - 8
1566  case 2:
1567  return StackSize <= 120; // 2*2^6 - 8
1568  case 4:
1569  return StackSize <= 248; // 4*2^6 - 8
1570  default:
1571  return false;
1572  }
1573 }
1574 
1575 // Return true when the given node fits in a positive half word.
1576 bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const {
1577  if (const ConstantSDNode *CN = dyn_cast<const ConstantSDNode>(N)) {
1578  int64_t V = CN->getSExtValue();
1579  return V > 0 && isInt<16>(V);
1580  }
1581  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
1582  const VTSDNode *VN = dyn_cast<const VTSDNode>(N->getOperand(1));
1583  return VN->getVT().getSizeInBits() <= 16;
1584  }
1585  return false;
1586 }
1587 
1588 bool HexagonDAGToDAGISel::hasOneUse(const SDNode *N) const {
1589  return !CheckSingleUse || N->hasOneUse();
1590 }
1591 
1592 ////////////////////////////////////////////////////////////////////////////////
1593 // Rebalancing of address calculation trees
1594 
1595 static bool isOpcodeHandled(const SDNode *N) {
1596  switch (N->getOpcode()) {
1597  case ISD::ADD:
1598  case ISD::MUL:
1599  return true;
1600  case ISD::SHL:
1601  // We only handle constant shifts because these can be easily flattened
1602  // into multiplications by 2^Op1.
1603  return isa<ConstantSDNode>(N->getOperand(1).getNode());
1604  default:
1605  return false;
1606  }
1607 }
1608 
1609 /// Return the weight of an SDNode
1610 int HexagonDAGToDAGISel::getWeight(SDNode *N) {
1611  if (!isOpcodeHandled(N))
1612  return 1;
1613  assert(RootWeights.count(N) && "Cannot get weight of unseen root!");
1614  assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!");
1615  assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!");
1616  return RootWeights[N];
1617 }
1618 
1619 int HexagonDAGToDAGISel::getHeight(SDNode *N) {
1620  if (!isOpcodeHandled(N))
1621  return 0;
1622  assert(RootWeights.count(N) && RootWeights[N] >= 0 &&
1623  "Cannot query height of unvisited/RAUW'd node!");
1624  return RootHeights[N];
1625 }
1626 
1627 namespace {
1628 struct WeightedLeaf {
1629  SDValue Value;
1630  int Weight;
1631  int InsertionOrder;
1632 
1633  WeightedLeaf() : Value(SDValue()) { }
1634 
1635  WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
1636  Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
1637  assert(Weight >= 0 && "Weight must be >= 0");
1638  }
1639 
1640  static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) {
1641  assert(A.Value.getNode() && B.Value.getNode());
1642  return A.Weight == B.Weight ?
1643  (A.InsertionOrder > B.InsertionOrder) :
1644  (A.Weight > B.Weight);
1645  }
1646 };
1647 
1648 /// A specialized priority queue for WeigthedLeaves. It automatically folds
1649 /// constants and allows removal of non-top elements while maintaining the
1650 /// priority order.
1651 class LeafPrioQueue {
1653  bool HaveConst;
1654  WeightedLeaf ConstElt;
1655  unsigned Opcode;
1656 
1657 public:
1658  bool empty() {
1659  return (!HaveConst && Q.empty());
1660  }
1661 
1662  size_t size() {
1663  return Q.size() + HaveConst;
1664  }
1665 
1666  bool hasConst() {
1667  return HaveConst;
1668  }
1669 
1670  const WeightedLeaf &top() {
1671  if (HaveConst)
1672  return ConstElt;
1673  return Q.front();
1674  }
1675 
1676  WeightedLeaf pop() {
1677  if (HaveConst) {
1678  HaveConst = false;
1679  return ConstElt;
1680  }
1681  std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1682  return Q.pop_back_val();
1683  }
1684 
1685  void push(WeightedLeaf L, bool SeparateConst=true) {
1686  if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) {
1687  if (Opcode == ISD::MUL &&
1688  cast<ConstantSDNode>(L.Value)->getSExtValue() == 1)
1689  return;
1690  if (Opcode == ISD::ADD &&
1691  cast<ConstantSDNode>(L.Value)->getSExtValue() == 0)
1692  return;
1693 
1694  HaveConst = true;
1695  ConstElt = L;
1696  } else {
1697  Q.push_back(L);
1698  std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1699  }
1700  }
1701 
1702  /// Push L to the bottom of the queue regardless of its weight. If L is
1703  /// constant, it will not be folded with other constants in the queue.
1704  void pushToBottom(WeightedLeaf L) {
1705  L.Weight = 1000;
1706  push(L, false);
1707  }
1708 
1709  /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of
1710  /// lowest weight and remove it from the queue.
1711  WeightedLeaf findSHL(uint64_t MaxAmount);
1712 
1713  WeightedLeaf findMULbyConst();
1714 
1715  LeafPrioQueue(unsigned Opcode) :
1716  HaveConst(false), Opcode(Opcode) { }
1717 };
1718 } // end anonymous namespace
1719 
1720 WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) {
1721  int ResultPos;
1722  WeightedLeaf Result;
1723 
1724  for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1725  const WeightedLeaf &L = Q[Pos];
1726  const SDValue &Val = L.Value;
1727  if (Val.getOpcode() != ISD::SHL ||
1728  !isa<ConstantSDNode>(Val.getOperand(1)) ||
1729  Val.getConstantOperandVal(1) > MaxAmount)
1730  continue;
1731  if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1732  (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1733  {
1734  Result = L;
1735  ResultPos = Pos;
1736  }
1737  }
1738 
1739  if (Result.Value.getNode()) {
1740  Q.erase(&Q[ResultPos]);
1741  std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1742  }
1743 
1744  return Result;
1745 }
1746 
1747 WeightedLeaf LeafPrioQueue::findMULbyConst() {
1748  int ResultPos;
1749  WeightedLeaf Result;
1750 
1751  for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1752  const WeightedLeaf &L = Q[Pos];
1753  const SDValue &Val = L.Value;
1754  if (Val.getOpcode() != ISD::MUL ||
1755  !isa<ConstantSDNode>(Val.getOperand(1)) ||
1756  Val.getConstantOperandVal(1) > 127)
1757  continue;
1758  if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1759  (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1760  {
1761  Result = L;
1762  ResultPos = Pos;
1763  }
1764  }
1765 
1766  if (Result.Value.getNode()) {
1767  Q.erase(&Q[ResultPos]);
1768  std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1769  }
1770 
1771  return Result;
1772 }
1773 
1774 SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) {
1775  uint64_t MulFactor = 1ull << N->getConstantOperandVal(1);
1776  return CurDAG->getConstant(MulFactor, SDLoc(N),
1777  N->getOperand(1).getValueType());
1778 }
1779 
1780 /// @returns the value x for which 2^x is a factor of Val
1781 static unsigned getPowerOf2Factor(SDValue Val) {
1782  if (Val.getOpcode() == ISD::MUL) {
1783  unsigned MaxFactor = 0;
1784  for (int i = 0; i < 2; ++i) {
1786  if (!C)
1787  continue;
1788  const APInt &CInt = C->getAPIntValue();
1789  if (CInt.getBoolValue())
1790  MaxFactor = CInt.countTrailingZeros();
1791  }
1792  return MaxFactor;
1793  }
1794  if (Val.getOpcode() == ISD::SHL) {
1795  if (!isa<ConstantSDNode>(Val.getOperand(1).getNode()))
1796  return 0;
1797  return (unsigned) Val.getConstantOperandVal(1);
1798  }
1799 
1800  return 0;
1801 }
1802 
1803 /// @returns true if V>>Amount will eliminate V's operation on its child
1804 static bool willShiftRightEliminate(SDValue V, unsigned Amount) {
1805  if (V.getOpcode() == ISD::MUL) {
1806  SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1807  for (int i = 0; i < 2; ++i)
1808  if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1809  V.getConstantOperandVal(i) % (1ULL << Amount) == 0) {
1810  uint64_t NewConst = V.getConstantOperandVal(i) >> Amount;
1811  return (NewConst == 1);
1812  }
1813  } else if (V.getOpcode() == ISD::SHL) {
1814  return (Amount == V.getConstantOperandVal(1));
1815  }
1816 
1817  return false;
1818 }
1819 
1820 SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) {
1821  SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1822  if (V.getOpcode() == ISD::MUL) {
1823  for (int i=0; i < 2; ++i) {
1824  if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1825  V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) {
1826  uint64_t NewConst = V.getConstantOperandVal(i) >> Power;
1827  if (NewConst == 1)
1828  return Ops[!i];
1829  Ops[i] = CurDAG->getConstant(NewConst,
1830  SDLoc(V), V.getValueType());
1831  break;
1832  }
1833  }
1834  } else if (V.getOpcode() == ISD::SHL) {
1835  uint64_t ShiftAmount = V.getConstantOperandVal(1);
1836  if (ShiftAmount == Power)
1837  return Ops[0];
1838  Ops[1] = CurDAG->getConstant(ShiftAmount - Power,
1839  SDLoc(V), V.getValueType());
1840  }
1841 
1842  return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops);
1843 }
1844 
1845 static bool isTargetConstant(const SDValue &V) {
1846  return V.getOpcode() == HexagonISD::CONST32 ||
1848 }
1849 
1850 unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) {
1851  if (GAUsesInFunction.count(V))
1852  return GAUsesInFunction[V];
1853 
1854  unsigned Result = 0;
1855  const Function &CurF = CurDAG->getMachineFunction().getFunction();
1856  for (const User *U : V->users()) {
1857  if (isa<Instruction>(U) &&
1858  cast<Instruction>(U)->getParent()->getParent() == &CurF)
1859  ++Result;
1860  }
1861 
1862  GAUsesInFunction[V] = Result;
1863 
1864  return Result;
1865 }
1866 
1867 /// Note - After calling this, N may be dead. It may have been replaced by a
1868 /// new node, so always use the returned value in place of N.
1869 ///
1870 /// @returns The SDValue taking the place of N (which could be N if it is
1871 /// unchanged)
1872 SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
1873  assert(RootWeights.count(N) && "Cannot balance non-root node.");
1874  assert(RootWeights[N] != -2 && "This node was RAUW'd!");
1875  assert(!TopLevel || N->getOpcode() == ISD::ADD);
1876 
1877  // Return early if this node was already visited
1878  if (RootWeights[N] != -1)
1879  return SDValue(N, 0);
1880 
1881  assert(isOpcodeHandled(N));
1882 
1883  SDValue Op0 = N->getOperand(0);
1884  SDValue Op1 = N->getOperand(1);
1885 
1886  // Return early if the operands will remain unchanged or are all roots
1887  if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) &&
1888  (!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) {
1889  SDNode *Op0N = Op0.getNode();
1890  int Weight;
1891  if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) {
1892  Weight = getWeight(balanceSubTree(Op0N).getNode());
1893  // Weight = calculateWeight(Op0N);
1894  } else
1895  Weight = getWeight(Op0N);
1896 
1897  SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd
1898  if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) {
1899  Weight += getWeight(balanceSubTree(Op1N).getNode());
1900  // Weight += calculateWeight(Op1N);
1901  } else
1902  Weight += getWeight(Op1N);
1903 
1904  RootWeights[N] = Weight;
1905  RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
1906  getHeight(N->getOperand(1).getNode())) + 1;
1907 
1908  LLVM_DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
1909  << " Height=" << RootHeights[N] << "): ");
1910  LLVM_DEBUG(N->dump(CurDAG));
1911 
1912  return SDValue(N, 0);
1913  }
1914 
1915  LLVM_DEBUG(dbgs() << "** Balancing root node: ");
1916  LLVM_DEBUG(N->dump(CurDAG));
1917 
1918  unsigned NOpcode = N->getOpcode();
1919 
1920  LeafPrioQueue Leaves(NOpcode);
1921  SmallVector<SDValue, 4> Worklist;
1922  Worklist.push_back(SDValue(N, 0));
1923 
1924  // SHL nodes will be converted to MUL nodes
1925  if (NOpcode == ISD::SHL)
1926  NOpcode = ISD::MUL;
1927 
1928  bool CanFactorize = false;
1929  WeightedLeaf Mul1, Mul2;
1930  unsigned MaxPowerOf2 = 0;
1931  WeightedLeaf GA;
1932 
1933  // Do not try to factor out a shift if there is already a shift at the tip of
1934  // the tree.
1935  bool HaveTopLevelShift = false;
1936  if (TopLevel &&
1937  ((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL &&
1938  Op0.getConstantOperandVal(1) < 4) ||
1939  (isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL &&
1940  Op1.getConstantOperandVal(1) < 4)))
1941  HaveTopLevelShift = true;
1942 
1943  // Flatten the subtree into an ordered list of leaves; at the same time
1944  // determine whether the tree is already balanced.
1945  int InsertionOrder = 0;
1946  SmallDenseMap<SDValue, int> NodeHeights;
1947  bool Imbalanced = false;
1948  int CurrentWeight = 0;
1949  while (!Worklist.empty()) {
1950  SDValue Child = Worklist.pop_back_val();
1951 
1952  if (Child.getNode() != N && RootWeights.count(Child.getNode())) {
1953  // CASE 1: Child is a root note
1954 
1955  int Weight = RootWeights[Child.getNode()];
1956  if (Weight == -1) {
1957  Child = balanceSubTree(Child.getNode());
1958  // calculateWeight(Child.getNode());
1959  Weight = getWeight(Child.getNode());
1960  } else if (Weight == -2) {
1961  // Whoops, this node was RAUWd by one of the balanceSubTree calls we
1962  // made. Our worklist isn't up to date anymore.
1963  // Restart the whole process.
1964  LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
1965  return balanceSubTree(N, TopLevel);
1966  }
1967 
1968  NodeHeights[Child] = 1;
1969  CurrentWeight += Weight;
1970 
1971  unsigned PowerOf2;
1972  if (TopLevel && !CanFactorize && !HaveTopLevelShift &&
1973  (Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) &&
1974  Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) {
1975  // Try to identify two factorizable MUL/SHL children greedily. Leave
1976  // them out of the priority queue for now so we can deal with them
1977  // after.
1978  if (!Mul1.Value.getNode()) {
1979  Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++);
1980  MaxPowerOf2 = PowerOf2;
1981  } else {
1982  Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++);
1983  MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2);
1984 
1985  // Our addressing modes can only shift by a maximum of 3
1986  if (MaxPowerOf2 > 3)
1987  MaxPowerOf2 = 3;
1988 
1989  CanFactorize = true;
1990  }
1991  } else
1992  Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
1993  } else if (!isOpcodeHandled(Child.getNode())) {
1994  // CASE 2: Child is an unhandled kind of node (e.g. constant)
1995  int Weight = getWeight(Child.getNode());
1996 
1997  NodeHeights[Child] = getHeight(Child.getNode());
1998  CurrentWeight += Weight;
1999 
2000  if (isTargetConstant(Child) && !GA.Value.getNode())
2001  GA = WeightedLeaf(Child, Weight, InsertionOrder++);
2002  else
2003  Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
2004  } else {
2005  // CASE 3: Child is a subtree of same opcode
2006  // Visit children first, then flatten.
2007  unsigned ChildOpcode = Child.getOpcode();
2008  assert(ChildOpcode == NOpcode ||
2009  (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL));
2010 
2011  // Convert SHL to MUL
2012  SDValue Op1;
2013  if (ChildOpcode == ISD::SHL)
2014  Op1 = getMultiplierForSHL(Child.getNode());
2015  else
2016  Op1 = Child->getOperand(1);
2017 
2018  if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) {
2019  assert(!NodeHeights.count(Child) && "Parent visited before children?");
2020  // Visit children first, then re-visit this node
2021  Worklist.push_back(Child);
2022  Worklist.push_back(Op1);
2023  Worklist.push_back(Child->getOperand(0));
2024  } else {
2025  // Back at this node after visiting the children
2026  if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1)
2027  Imbalanced = true;
2028 
2029  NodeHeights[Child] = std::max(NodeHeights[Op1],
2030  NodeHeights[Child->getOperand(0)]) + 1;
2031  }
2032  }
2033  }
2034 
2035  LLVM_DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
2036  << " weight=" << CurrentWeight
2037  << " imbalanced=" << Imbalanced << "\n");
2038 
2039  // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
2040  // This factors out a shift in order to match memw(a<<Y+b).
2041  if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
2042  willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) {
2043  LLVM_DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
2044  int Weight = Mul1.Weight + Mul2.Weight;
2045  int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
2046  SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
2047  SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2);
2048  SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(),
2049  Mul1Factored, Mul2Factored);
2050  SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N),
2051  Mul1.Value.getValueType());
2052  SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(),
2053  Sum, Const);
2054  NodeHeights[New] = Height;
2055  Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder));
2056  } else if (Mul1.Value.getNode()) {
2057  // We failed to factorize two MULs, so now the Muls are left outside the
2058  // queue... add them back.
2059  Leaves.push(Mul1);
2060  if (Mul2.Value.getNode())
2061  Leaves.push(Mul2);
2062  CanFactorize = false;
2063  }
2064 
2065  // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere
2066  // and the root node itself is not used more than twice. This reduces the
2067  // amount of additional constant extenders introduced by this optimization.
2068  bool CombinedGA = false;
2069  if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() &&
2070  GA.Value.hasOneUse() && N->use_size() < 3) {
2071  GlobalAddressSDNode *GANode =
2072  cast<GlobalAddressSDNode>(GA.Value.getOperand(0));
2073  ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value);
2074 
2075  if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() &&
2077  LLVM_DEBUG(dbgs() << "--> Combining GA and offset ("
2078  << Offset->getSExtValue() << "): ");
2079  LLVM_DEBUG(GANode->dump(CurDAG));
2080 
2081  SDValue NewTGA =
2082  CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
2083  GANode->getValueType(0),
2084  GANode->getOffset() + (uint64_t)Offset->getSExtValue());
2085  GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value),
2086  GA.Value.getValueType(), NewTGA);
2087  GA.Weight += Leaves.top().Weight;
2088 
2089  NodeHeights[GA.Value] = getHeight(GA.Value.getNode());
2090  CombinedGA = true;
2091 
2092  Leaves.pop(); // Remove the offset constant from the queue
2093  }
2094  }
2095 
2096  if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) ||
2097  (RebalanceOnlyImbalancedTrees && !Imbalanced)) {
2098  RootWeights[N] = CurrentWeight;
2099  RootHeights[N] = NodeHeights[SDValue(N, 0)];
2100 
2101  return SDValue(N, 0);
2102  }
2103 
2104  // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5))
2105  if (NOpcode == ISD::ADD && GA.Value.getNode()) {
2106  WeightedLeaf SHL = Leaves.findSHL(31);
2107  if (SHL.Value.getNode()) {
2108  int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1;
2109  GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value),
2110  GA.Value.getValueType(),
2111  GA.Value, SHL.Value);
2112  GA.Weight = SHL.Weight; // Specifically ignore the GA weight here
2113  NodeHeights[GA.Value] = Height;
2114  }
2115  }
2116 
2117  if (GA.Value.getNode())
2118  Leaves.push(GA);
2119 
2120  // If this is the top level and we haven't factored out a shift, we should try
2121  // to move a constant to the bottom to match addressing modes like memw(rX+C)
2122  if (TopLevel && !CanFactorize && Leaves.hasConst()) {
2123  LLVM_DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
2124  Leaves.pushToBottom(Leaves.pop());
2125  }
2126 
2127  const DataLayout &DL = CurDAG->getDataLayout();
2128  const TargetLowering &TLI = *getTargetLowering();
2129 
2130  // Rebuild the tree using Huffman's algorithm
2131  while (Leaves.size() > 1) {
2132  WeightedLeaf L0 = Leaves.pop();
2133 
2134  // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)),
2135  // otherwise just get the next leaf
2136  WeightedLeaf L1 = Leaves.findMULbyConst();
2137  if (!L1.Value.getNode())
2138  L1 = Leaves.pop();
2139 
2140  assert(L0.Weight <= L1.Weight && "Priority queue is broken!");
2141 
2142  SDValue V0 = L0.Value;
2143  int V0Weight = L0.Weight;
2144  SDValue V1 = L1.Value;
2145  int V1Weight = L1.Weight;
2146 
2147  // Make sure that none of these nodes have been RAUW'd
2148  if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) ||
2149  (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) {
2150  LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
2151  return balanceSubTree(N, TopLevel);
2152  }
2153 
2156  EVT VT = N->getValueType(0);
2157  SDValue NewNode;
2158 
2159  if (V0C && !V1C) {
2160  std::swap(V0, V1);
2161  std::swap(V0C, V1C);
2162  }
2163 
2164  // Calculate height of this node
2165  assert(NodeHeights.count(V0) && NodeHeights.count(V1) &&
2166  "Children must have been visited before re-combining them!");
2167  int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1;
2168 
2169  // Rebuild this node (and restore SHL from MUL if needed)
2170  if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2())
2171  NewNode = CurDAG->getNode(
2172  ISD::SHL, SDLoc(V0), VT, V0,
2174  V1C->getAPIntValue().logBase2(), SDLoc(N),
2175  TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2176  else
2177  NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1);
2178 
2179  NodeHeights[NewNode] = Height;
2180 
2181  int Weight = V0Weight + V1Weight;
2182  Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
2183 
2184  LLVM_DEBUG(dbgs() << "--> Built new node (Weight=" << Weight
2185  << ",Height=" << Height << "):\n");
2186  LLVM_DEBUG(NewNode.dump());
2187  }
2188 
2189  assert(Leaves.size() == 1);
2190  SDValue NewRoot = Leaves.top().Value;
2191 
2192  assert(NodeHeights.count(NewRoot));
2193  int Height = NodeHeights[NewRoot];
2194 
2195  // Restore SHL if we earlier converted it to a MUL
2196  if (NewRoot.getOpcode() == ISD::MUL) {
2197  ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1));
2198  if (V1C && V1C->getAPIntValue().isPowerOf2()) {
2199  EVT VT = NewRoot.getValueType();
2200  SDValue V0 = NewRoot.getOperand(0);
2201  NewRoot = CurDAG->getNode(
2202  ISD::SHL, SDLoc(NewRoot), VT, V0,
2204  V1C->getAPIntValue().logBase2(), SDLoc(NewRoot),
2205  TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2206  }
2207  }
2208 
2209  if (N != NewRoot.getNode()) {
2210  LLVM_DEBUG(dbgs() << "--> Root is now: ");
2211  LLVM_DEBUG(NewRoot.dump());
2212 
2213  // Replace all uses of old root by new root
2214  CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
2215  // Mark that we have RAUW'd N
2216  RootWeights[N] = -2;
2217  } else {
2218  LLVM_DEBUG(dbgs() << "--> Root unchanged.\n");
2219  }
2220 
2221  RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
2222  RootHeights[NewRoot.getNode()] = Height;
2223 
2224  return NewRoot;
2225 }
2226 
2227 void HexagonDAGToDAGISel::rebalanceAddressTrees() {
2228  for (auto I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E;) {
2229  SDNode *N = &*I++;
2230  if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
2231  continue;
2232 
2233  SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr();
2234  if (BasePtr.getOpcode() != ISD::ADD)
2235  continue;
2236 
2237  // We've already processed this node
2238  if (RootWeights.count(BasePtr.getNode()))
2239  continue;
2240 
2241  LLVM_DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
2242  LLVM_DEBUG(N->dump(CurDAG));
2243 
2244  // FindRoots
2245  SmallVector<SDNode *, 4> Worklist;
2246 
2247  Worklist.push_back(BasePtr.getOperand(0).getNode());
2248  Worklist.push_back(BasePtr.getOperand(1).getNode());
2249 
2250  while (!Worklist.empty()) {
2251  SDNode *N = Worklist.pop_back_val();
2252  unsigned Opcode = N->getOpcode();
2253 
2254  if (!isOpcodeHandled(N))
2255  continue;
2256 
2257  Worklist.push_back(N->getOperand(0).getNode());
2258  Worklist.push_back(N->getOperand(1).getNode());
2259 
2260  // Not a root if it has only one use and same opcode as its parent
2261  if (N->hasOneUse() && Opcode == N->use_begin()->getOpcode())
2262  continue;
2263 
2264  // This root node has already been processed
2265  if (RootWeights.count(N))
2266  continue;
2267 
2268  RootWeights[N] = -1;
2269  }
2270 
2271  // Balance node itself
2272  RootWeights[BasePtr.getNode()] = -1;
2273  SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true);
2274 
2275  if (N->getOpcode() == ISD::LOAD)
2276  N = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
2277  NewBasePtr, N->getOperand(2));
2278  else
2279  N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
2280  NewBasePtr, N->getOperand(3));
2281 
2282  LLVM_DEBUG(dbgs() << "--> Final node: ");
2283  LLVM_DEBUG(N->dump(CurDAG));
2284  }
2285 
2287  GAUsesInFunction.clear();
2288  RootHeights.clear();
2289  RootWeights.clear();
2290 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
uint64_t CallInst * C
static MVT getIntegerVT(unsigned BitWidth)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOffset() const
const GlobalValue * getGlobal() const
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
static cl::opt< bool > CheckSingleUse("hexagon-isel-su", cl::Hidden, cl::init(true), cl::desc("Enable checking of SDNode's single-use status"))
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static bool isMemOPCandidate(SDNode *I, SDNode *U)
static bool willShiftRightEliminate(SDValue V, unsigned Amount)
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:351
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool DetectUseSxtw(SDValue &N, SDValue &R)
const SDValue & getValue() const
SDVTList getVTList() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
bool SelectAddrFI(SDValue &N, SDValue &R)
const SDValue & getChain() const
static unsigned getPowerOf2Factor(SDValue Val)
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
unsigned getAlignment() const
Hexagon target-specific information for each MachineFunction.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:306
A debug info location.
Definition: DebugLoc.h:34
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685
const HexagonFrameLowering * getFrameLowering() const override
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:189
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
MachineFunction * MF
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1503
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
bool isTruncatingStore() const
Return true if the op does a truncation before store.
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:65
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1626
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
static ManagedStatic< DebugCounter > DC
static cl::opt< bool > RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false), cl::desc("Rebalance address tree only if this allows optimizations"))
Shift and rotation operations.
Definition: ISDOpcodes.h:390
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:478
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
const TargetLowering * TLI
static bool isTargetConstant(const SDValue &V)
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl)
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:457
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:398
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This class is used to represent EVT&#39;s, which are used to parameterize some operations.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:446
unsigned getSizeInBits() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:478
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:395
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:625
#define T
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:573
bool SelectNewCircIntrinsic(SDNode *IntN)
Generate a machine instruction node for the new circlar buffer intrinsics.
static cl::opt< bool > EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true), cl::desc("Rebalance address calculation trees to improve " "instruction selection"))
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
bool SelectBrevLdIntrinsic(SDNode *IntN)
SDNode * StoreInstrForLoadIntrinsic(MachineSDNode *LoadN, SDNode *IntN)
This class is used to represent ISD::STORE nodes.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
const SDValue & getBasePtr() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool SelectAnyImm0(SDValue &N, SDValue &R)
bool SelectAnyInt(SDValue &N, SDValue &R)
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:912
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions...
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
const SDValue & getOffset() const
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool isValidAutoIncImm(const EVT VT, const int Offset) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
const APInt & getAPIntValue() const
bool SelectAnyImm2(SDValue &N, SDValue &R)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
Extended Value Type.
Definition: ValueTypes.h:34
void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl)
const MachineBasicBlock & front() const
size_t size() const
Definition: SmallVector.h:53
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isOpcodeHandled(const SDNode *N)
void dump() const
Dump this node, for debugging.
bool isHVXVectorType(MVT VecTy, bool IncludeBool=false) const
bool SelectAddrGA(SDValue &N, SDValue &R)
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1023
BlockVerifier::State From
ilist< SDNode >::size_type allnodes_size() const
Definition: SelectionDAG.h:442
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP, uint32_t LogAlign)
unsigned estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
void dump() const
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:900
virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const
EVT is not used in-tree, but is used by out-of-tree target.
An SDNode that represents everything that will be needed to construct a MachineInstr.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
CHAIN = SC CHAIN, Imm128 - System call.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Represents one node in the SelectionDAG.
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:434
const Function & getFunction() const
Return the LLVM function that this machine code represents.
bool SelectAnyImmediate(SDValue &N, SDValue &R, uint32_t LogAlign)
unsigned logBase2() const
Definition: APInt.h:1742
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
size_t use_size() const
Return the number of uses of this node.
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:70
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:400
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:464
iterator_range< user_iterator > users()
Definition: Value.h:400
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:448
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:451
static cl::opt< bool > RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden, cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"))
bool SelectAnyImm3(SDValue &N, SDValue &R)
bool SelectAnyImm1(SDValue &N, SDValue &R)
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:435
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
const SDValue & getValue() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:371
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:702
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
bool SelectAddrGP(SDValue &N, SDValue &R)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:466
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:592
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
uint32_t Size
Definition: Profile.cpp:47
bool tryLoadOfLoadIntrinsic(LoadSDNode *N)
FunctionPass * createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOpt::Level OptLevel)
unsigned getOpcode() const
unsigned CreateReg(MVT VT)
CreateReg - Allocate a single virtual register for the given type.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:146
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
const TargetLowering * getTargetLowering() const
bool SelectAnyImm(SDValue &N, SDValue &R)
This class is used to form a handle around another node that is persistent and is updated across invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:566
LLVM Value Representation.
Definition: Value.h:73
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
static const Function * getParent(const Value *V)
bool isNonTemporal() const
const APFloat & getValueAPF() const
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
APInt bitcastToAPInt() const
Definition: APFloat.h:1094
Conversion operators.
Definition: ISDOpcodes.h:445
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
MachineSDNode * LoadInstrForLoadIntrinsic(SDNode *IntN)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand *> NewMemRefs)
Mutate the specified machine node&#39;s memory references to the provided list.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
FunctionLoweringInfo * FuncInfo
#define T1
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:619
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:892
This class is used to represent ISD::LOAD nodes.