LLVM  7.0.0svn
HexagonISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines an instruction selector for the Hexagon target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "Hexagon.h"
15 #include "HexagonISelDAGToDAG.h"
16 #include "HexagonISelLowering.h"
18 #include "HexagonTargetMachine.h"
22 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/Support/Debug.h"
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "hexagon-isel"
28 
29 static
31 EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true),
32  cl::desc("Rebalance address calculation trees to improve "
33  "instruction selection"));
34 
35 // Rebalance only if this allows e.g. combining a GA with an offset or
36 // factoring out a shift.
37 static
39 RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false),
40  cl::desc("Rebalance address tree only if this allows optimizations"));
41 
42 static
44 RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden,
45  cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));
46 
47 static cl::opt<bool> CheckSingleUse("hexagon-isel-su", cl::Hidden,
48  cl::init(true), cl::desc("Enable checking of SDNode's single-use status"));
49 
50 //===----------------------------------------------------------------------===//
51 // Instruction Selector Implementation
52 //===----------------------------------------------------------------------===//
53 
54 #define GET_DAGISEL_BODY HexagonDAGToDAGISel
55 #include "HexagonGenDAGISel.inc"
56 
57 /// createHexagonISelDag - This pass converts a legalized DAG into a
58 /// Hexagon-specific DAG, ready for instruction scheduling.
59 ///
60 namespace llvm {
62  CodeGenOpt::Level OptLevel) {
63  return new HexagonDAGToDAGISel(TM, OptLevel);
64 }
65 }
66 
68  SDValue Chain = LD->getChain();
69  SDValue Base = LD->getBasePtr();
70  SDValue Offset = LD->getOffset();
71  int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
72  EVT LoadedVT = LD->getMemoryVT();
73  unsigned Opcode = 0;
74 
75  // Check for zero extended loads. Treat any-extend loads as zero extended
76  // loads.
77  ISD::LoadExtType ExtType = LD->getExtensionType();
78  bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
79  bool IsValidInc = HII->isValidAutoIncImm(LoadedVT, Inc);
80 
81  assert(LoadedVT.isSimple());
82  switch (LoadedVT.getSimpleVT().SimpleTy) {
83  case MVT::i8:
84  if (IsZeroExt)
85  Opcode = IsValidInc ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrub_io;
86  else
87  Opcode = IsValidInc ? Hexagon::L2_loadrb_pi : Hexagon::L2_loadrb_io;
88  break;
89  case MVT::i16:
90  if (IsZeroExt)
91  Opcode = IsValidInc ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadruh_io;
92  else
93  Opcode = IsValidInc ? Hexagon::L2_loadrh_pi : Hexagon::L2_loadrh_io;
94  break;
95  case MVT::i32:
96  case MVT::f32:
97  case MVT::v2i16:
98  case MVT::v4i8:
99  Opcode = IsValidInc ? Hexagon::L2_loadri_pi : Hexagon::L2_loadri_io;
100  break;
101  case MVT::i64:
102  case MVT::f64:
103  case MVT::v2i32:
104  case MVT::v4i16:
105  case MVT::v8i8:
106  Opcode = IsValidInc ? Hexagon::L2_loadrd_pi : Hexagon::L2_loadrd_io;
107  break;
108  case MVT::v64i8:
109  case MVT::v32i16:
110  case MVT::v16i32:
111  case MVT::v8i64:
112  case MVT::v128i8:
113  case MVT::v64i16:
114  case MVT::v32i32:
115  case MVT::v16i64:
116  if (isAlignedMemNode(LD)) {
117  if (LD->isNonTemporal())
118  Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai;
119  else
120  Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai;
121  } else {
122  Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai;
123  }
124  break;
125  default:
126  llvm_unreachable("Unexpected memory type in indexed load");
127  }
128 
129  SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
131  MemOp[0] = LD->getMemOperand();
132 
133  auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl)
134  -> MachineSDNode* {
135  if (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD) {
136  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
137  return CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64,
138  Zero, SDValue(N, 0));
139  }
140  if (ExtType == ISD::SEXTLOAD)
141  return CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
142  SDValue(N, 0));
143  return N;
144  };
145 
146  // Loaded value Next address Chain
147  SDValue From[3] = { SDValue(LD,0), SDValue(LD,1), SDValue(LD,2) };
148  SDValue To[3];
149 
150  EVT ValueVT = LD->getValueType(0);
151  if (ValueVT == MVT::i64 && ExtType != ISD::NON_EXTLOAD) {
152  // A load extending to i64 will actually produce i32, which will then
153  // need to be extended to i64.
154  assert(LoadedVT.getSizeInBits() <= 32);
155  ValueVT = MVT::i32;
156  }
157 
158  if (IsValidInc) {
159  MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT,
160  MVT::i32, MVT::Other, Base,
161  IncV, Chain);
162  L->setMemRefs(MemOp, MemOp+1);
163  To[1] = SDValue(L, 1); // Next address.
164  To[2] = SDValue(L, 2); // Chain.
165  // Handle special case for extension to i64.
166  if (LD->getValueType(0) == MVT::i64)
167  L = getExt64(L, dl);
168  To[0] = SDValue(L, 0); // Loaded (extended) value.
169  } else {
170  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
171  MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other,
172  Base, Zero, Chain);
173  L->setMemRefs(MemOp, MemOp+1);
174  To[2] = SDValue(L, 1); // Chain.
175  MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
176  Base, IncV);
177  To[1] = SDValue(A, 0); // Next address.
178  // Handle special case for extension to i64.
179  if (LD->getValueType(0) == MVT::i64)
180  L = getExt64(L, dl);
181  To[0] = SDValue(L, 0); // Loaded (extended) value.
182  }
183  ReplaceUses(From, To, 3);
184  CurDAG->RemoveDeadNode(LD);
185 }
186 
188  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
189  return nullptr;
190 
191  SDLoc dl(IntN);
192  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
193 
194  static std::map<unsigned,unsigned> LoadPciMap = {
195  { Intrinsic::hexagon_circ_ldb, Hexagon::L2_loadrb_pci },
196  { Intrinsic::hexagon_circ_ldub, Hexagon::L2_loadrub_pci },
197  { Intrinsic::hexagon_circ_ldh, Hexagon::L2_loadrh_pci },
198  { Intrinsic::hexagon_circ_lduh, Hexagon::L2_loadruh_pci },
199  { Intrinsic::hexagon_circ_ldw, Hexagon::L2_loadri_pci },
200  { Intrinsic::hexagon_circ_ldd, Hexagon::L2_loadrd_pci },
201  };
202  auto FLC = LoadPciMap.find(IntNo);
203  if (FLC != LoadPciMap.end()) {
204  EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i64 : MVT::i32;
205  EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
206  // Operands: { Base, Increment, Modifier, Chain }
207  auto Inc = cast<ConstantSDNode>(IntN->getOperand(5));
208  SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), dl, MVT::i32);
209  MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys,
210  { IntN->getOperand(2), I, IntN->getOperand(4),
211  IntN->getOperand(0) });
212  return Res;
213  }
214 
215  return nullptr;
216 }
217 
219  SDNode *IntN) {
220  // The "LoadN" is just a machine load instruction. The intrinsic also
221  // involves storing it. Generate an appropriate store to the location
222  // given in the intrinsic's operand(3).
223  uint64_t F = HII->get(LoadN->getMachineOpcode()).TSFlags;
224  unsigned SizeBits = (F >> HexagonII::MemAccessSizePos) &
226  unsigned Size = 1U << (SizeBits-1);
227 
228  SDLoc dl(IntN);
230  SDValue TS;
231  SDValue Loc = IntN->getOperand(3);
232 
233  if (Size >= 4)
234  TS = CurDAG->getStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, PI,
235  Size);
236  else
237  TS = CurDAG->getTruncStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc,
238  PI, MVT::getIntegerVT(Size * 8), Size);
239 
240  SDNode *StoreN;
241  {
242  HandleSDNode Handle(TS);
243  SelectStore(TS.getNode());
244  StoreN = Handle.getValue().getNode();
245  }
246 
247  // Load's results are { Loaded value, Updated pointer, Chain }
248  ReplaceUses(SDValue(IntN, 0), SDValue(LoadN, 1));
249  ReplaceUses(SDValue(IntN, 1), SDValue(StoreN, 0));
250  return StoreN;
251 }
252 
254  // The intrinsics for load circ/brev perform two operations:
255  // 1. Load a value V from the specified location, using the addressing
256  // mode corresponding to the intrinsic.
257  // 2. Store V into a specified location. This location is typically a
258  // local, temporary object.
259  // In many cases, the program using these intrinsics will immediately
260  // load V again from the local object. In those cases, when certain
261  // conditions are met, the last load can be removed.
262  // This function identifies and optimizes this pattern. If the pattern
263  // cannot be optimized, it returns nullptr, which will cause the load
264  // to be selected separately from the intrinsic (which will be handled
265  // in SelectIntrinsicWChain).
266 
267  SDValue Ch = N->getOperand(0);
268  SDValue Loc = N->getOperand(1);
269 
270  // Assume that the load and the intrinsic are connected directly with a
271  // chain:
272  // t1: i32,ch = int.load ..., ..., ..., Loc, ... // <-- C
273  // t2: i32,ch = load t1:1, Loc, ...
274  SDNode *C = Ch.getNode();
275 
276  if (C->getOpcode() != ISD::INTRINSIC_W_CHAIN)
277  return false;
278 
279  // The second load can only be eliminated if its extension type matches
280  // that of the load instruction corresponding to the intrinsic. The user
281  // can provide an address of an unsigned variable to store the result of
282  // a sign-extending intrinsic into (or the other way around).
283  ISD::LoadExtType IntExt;
284  switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) {
285  case Intrinsic::hexagon_circ_ldub:
286  case Intrinsic::hexagon_circ_lduh:
287  IntExt = ISD::ZEXTLOAD;
288  break;
289  case Intrinsic::hexagon_circ_ldw:
290  case Intrinsic::hexagon_circ_ldd:
291  IntExt = ISD::NON_EXTLOAD;
292  break;
293  default:
294  IntExt = ISD::SEXTLOAD;
295  break;
296  }
297  if (N->getExtensionType() != IntExt)
298  return false;
299 
300  // Make sure the target location for the loaded value in the load intrinsic
301  // is the location from which LD (or N) is loading.
302  if (C->getNumOperands() < 4 || Loc.getNode() != C->getOperand(3).getNode())
303  return false;
304 
307  SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) };
308  SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) };
309  ReplaceUses(F, T, array_lengthof(T));
310  // This transformation will leave the intrinsic dead. If it remains in
311  // the DAG, the selection code will see it again, but without the load,
312  // and it will generate a store that is normally required for it.
314  return true;
315  }
316  return false;
317 }
318 
319 // Convert the bit-reverse load intrinsic to appropriate target instruction.
321  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
322  return false;
323 
324  const SDLoc &dl(IntN);
325  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
326 
327  static const std::map<unsigned, unsigned> LoadBrevMap = {
328  { Intrinsic::hexagon_L2_loadrb_pbr, Hexagon::L2_loadrb_pbr },
329  { Intrinsic::hexagon_L2_loadrub_pbr, Hexagon::L2_loadrub_pbr },
330  { Intrinsic::hexagon_L2_loadrh_pbr, Hexagon::L2_loadrh_pbr },
331  { Intrinsic::hexagon_L2_loadruh_pbr, Hexagon::L2_loadruh_pbr },
332  { Intrinsic::hexagon_L2_loadri_pbr, Hexagon::L2_loadri_pbr },
333  { Intrinsic::hexagon_L2_loadrd_pbr, Hexagon::L2_loadrd_pbr }
334  };
335  auto FLI = LoadBrevMap.find(IntNo);
336  if (FLI != LoadBrevMap.end()) {
337  EVT ValTy =
338  (IntNo == Intrinsic::hexagon_L2_loadrd_pbr) ? MVT::i64 : MVT::i32;
339  EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
340  // Operands of Intrinsic: {chain, enum ID of intrinsic, baseptr,
341  // modifier}.
342  // Operands of target instruction: { Base, Modifier, Chain }.
344  FLI->second, dl, RTys,
345  {IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(0)});
346 
348  MemOp[0] = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
349  Res->setMemRefs(MemOp, MemOp + 1);
350 
351  ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
352  ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
353  ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
354  CurDAG->RemoveDeadNode(IntN);
355  return true;
356  }
357  return false;
358 }
359 
360 /// Generate a machine instruction node for the new circlar buffer intrinsics.
361 /// The new versions use a CSx register instead of the K field.
363  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
364  return false;
365 
366  SDLoc DL(IntN);
367  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
369 
370  static std::map<unsigned,unsigned> LoadNPcMap = {
371  { Intrinsic::hexagon_L2_loadrub_pci, Hexagon::PS_loadrub_pci },
372  { Intrinsic::hexagon_L2_loadrb_pci, Hexagon::PS_loadrb_pci },
373  { Intrinsic::hexagon_L2_loadruh_pci, Hexagon::PS_loadruh_pci },
374  { Intrinsic::hexagon_L2_loadrh_pci, Hexagon::PS_loadrh_pci },
375  { Intrinsic::hexagon_L2_loadri_pci, Hexagon::PS_loadri_pci },
376  { Intrinsic::hexagon_L2_loadrd_pci, Hexagon::PS_loadrd_pci },
377  { Intrinsic::hexagon_L2_loadrub_pcr, Hexagon::PS_loadrub_pcr },
378  { Intrinsic::hexagon_L2_loadrb_pcr, Hexagon::PS_loadrb_pcr },
379  { Intrinsic::hexagon_L2_loadruh_pcr, Hexagon::PS_loadruh_pcr },
380  { Intrinsic::hexagon_L2_loadrh_pcr, Hexagon::PS_loadrh_pcr },
381  { Intrinsic::hexagon_L2_loadri_pcr, Hexagon::PS_loadri_pcr },
382  { Intrinsic::hexagon_L2_loadrd_pcr, Hexagon::PS_loadrd_pcr }
383  };
384  auto FLI = LoadNPcMap.find (IntNo);
385  if (FLI != LoadNPcMap.end()) {
386  EVT ValTy = MVT::i32;
387  if (IntNo == Intrinsic::hexagon_L2_loadrd_pci ||
388  IntNo == Intrinsic::hexagon_L2_loadrd_pcr)
389  ValTy = MVT::i64;
390  EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
391  // Handle load.*_pci case which has 6 operands.
392  if (IntN->getNumOperands() == 6) {
393  auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
394  SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
395  // Operands: { Base, Increment, Modifier, Start, Chain }.
396  Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
397  IntN->getOperand(0) };
398  } else
399  // Handle load.*_pcr case which has 5 operands.
400  // Operands: { Base, Modifier, Start, Chain }.
401  Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
402  IntN->getOperand(0) };
403  MachineSDNode *Res = CurDAG->getMachineNode(FLI->second, DL, RTys, Ops);
404  ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
405  ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
406  ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
407  CurDAG->RemoveDeadNode(IntN);
408  return true;
409  }
410 
411  static std::map<unsigned,unsigned> StoreNPcMap = {
412  { Intrinsic::hexagon_S2_storerb_pci, Hexagon::PS_storerb_pci },
413  { Intrinsic::hexagon_S2_storerh_pci, Hexagon::PS_storerh_pci },
414  { Intrinsic::hexagon_S2_storerf_pci, Hexagon::PS_storerf_pci },
415  { Intrinsic::hexagon_S2_storeri_pci, Hexagon::PS_storeri_pci },
416  { Intrinsic::hexagon_S2_storerd_pci, Hexagon::PS_storerd_pci },
417  { Intrinsic::hexagon_S2_storerb_pcr, Hexagon::PS_storerb_pcr },
418  { Intrinsic::hexagon_S2_storerh_pcr, Hexagon::PS_storerh_pcr },
419  { Intrinsic::hexagon_S2_storerf_pcr, Hexagon::PS_storerf_pcr },
420  { Intrinsic::hexagon_S2_storeri_pcr, Hexagon::PS_storeri_pcr },
421  { Intrinsic::hexagon_S2_storerd_pcr, Hexagon::PS_storerd_pcr }
422  };
423  auto FSI = StoreNPcMap.find (IntNo);
424  if (FSI != StoreNPcMap.end()) {
425  EVT RTys[] = { MVT::i32, MVT::Other };
426  // Handle store.*_pci case which has 7 operands.
427  if (IntN->getNumOperands() == 7) {
428  auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
429  SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
430  // Operands: { Base, Increment, Modifier, Value, Start, Chain }.
431  Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
432  IntN->getOperand(6), IntN->getOperand(0) };
433  } else
434  // Handle store.*_pcr case which has 6 operands.
435  // Operands: { Base, Modifier, Value, Start, Chain }.
436  Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
437  IntN->getOperand(5), IntN->getOperand(0) };
438  MachineSDNode *Res = CurDAG->getMachineNode(FSI->second, DL, RTys, Ops);
439  ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
440  ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
441  CurDAG->RemoveDeadNode(IntN);
442  return true;
443  }
444 
445  return false;
446 }
447 
449  SDLoc dl(N);
450  LoadSDNode *LD = cast<LoadSDNode>(N);
451 
452  // Handle indexed loads.
454  if (AM != ISD::UNINDEXED) {
455  SelectIndexedLoad(LD, dl);
456  return;
457  }
458 
459  // Handle patterns using circ/brev load intrinsics.
460  if (tryLoadOfLoadIntrinsic(LD))
461  return;
462 
463  SelectCode(LD);
464 }
465 
467  SDValue Chain = ST->getChain();
468  SDValue Base = ST->getBasePtr();
469  SDValue Offset = ST->getOffset();
470  SDValue Value = ST->getValue();
471  // Get the constant value.
472  int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
473  EVT StoredVT = ST->getMemoryVT();
474  EVT ValueVT = Value.getValueType();
475 
476  bool IsValidInc = HII->isValidAutoIncImm(StoredVT, Inc);
477  unsigned Opcode = 0;
478 
479  assert(StoredVT.isSimple());
480  switch (StoredVT.getSimpleVT().SimpleTy) {
481  case MVT::i8:
482  Opcode = IsValidInc ? Hexagon::S2_storerb_pi : Hexagon::S2_storerb_io;
483  break;
484  case MVT::i16:
485  Opcode = IsValidInc ? Hexagon::S2_storerh_pi : Hexagon::S2_storerh_io;
486  break;
487  case MVT::i32:
488  case MVT::f32:
489  case MVT::v2i16:
490  case MVT::v4i8:
491  Opcode = IsValidInc ? Hexagon::S2_storeri_pi : Hexagon::S2_storeri_io;
492  break;
493  case MVT::i64:
494  case MVT::f64:
495  case MVT::v2i32:
496  case MVT::v4i16:
497  case MVT::v8i8:
498  Opcode = IsValidInc ? Hexagon::S2_storerd_pi : Hexagon::S2_storerd_io;
499  break;
500  case MVT::v64i8:
501  case MVT::v32i16:
502  case MVT::v16i32:
503  case MVT::v8i64:
504  case MVT::v128i8:
505  case MVT::v64i16:
506  case MVT::v32i32:
507  case MVT::v16i64:
508  if (isAlignedMemNode(ST)) {
509  if (ST->isNonTemporal())
510  Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai;
511  else
512  Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai;
513  } else {
514  Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai;
515  }
516  break;
517  default:
518  llvm_unreachable("Unexpected memory type in indexed store");
519  }
520 
521  if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
522  assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
523  Value = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo,
524  dl, MVT::i32, Value);
525  }
526 
527  SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
529  MemOp[0] = ST->getMemOperand();
530 
531  // Next address Chain
532  SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) };
533  SDValue To[2];
534 
535  if (IsValidInc) {
536  // Build post increment store.
537  SDValue Ops[] = { Base, IncV, Value, Chain };
539  Ops);
540  S->setMemRefs(MemOp, MemOp + 1);
541  To[0] = SDValue(S, 0);
542  To[1] = SDValue(S, 1);
543  } else {
544  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
545  SDValue Ops[] = { Base, Zero, Value, Chain };
546  MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
547  S->setMemRefs(MemOp, MemOp + 1);
548  To[1] = SDValue(S, 0);
549  MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
550  Base, IncV);
551  To[0] = SDValue(A, 0);
552  }
553 
554  ReplaceUses(From, To, 2);
555  CurDAG->RemoveDeadNode(ST);
556 }
557 
559  SDLoc dl(N);
560  StoreSDNode *ST = cast<StoreSDNode>(N);
561 
562  // Handle indexed stores.
564  if (AM != ISD::UNINDEXED) {
565  SelectIndexedStore(ST, dl);
566  return;
567  }
568 
569  SelectCode(ST);
570 }
571 
573  SDLoc dl(N);
574  SDValue Shl_0 = N->getOperand(0);
575  SDValue Shl_1 = N->getOperand(1);
576 
577  auto Default = [this,N] () -> void { SelectCode(N); };
578 
579  if (N->getValueType(0) != MVT::i32 || Shl_1.getOpcode() != ISD::Constant)
580  return Default();
581 
582  // RHS is const.
583  int32_t ShlConst = cast<ConstantSDNode>(Shl_1)->getSExtValue();
584 
585  if (Shl_0.getOpcode() == ISD::MUL) {
586  SDValue Mul_0 = Shl_0.getOperand(0); // Val
587  SDValue Mul_1 = Shl_0.getOperand(1); // Const
588  // RHS of mul is const.
589  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mul_1)) {
590  int32_t ValConst = C->getSExtValue() << ShlConst;
591  if (isInt<9>(ValConst)) {
592  SDValue Val = CurDAG->getTargetConstant(ValConst, dl, MVT::i32);
593  SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
594  MVT::i32, Mul_0, Val);
595  ReplaceNode(N, Result);
596  return;
597  }
598  }
599  return Default();
600  }
601 
602  if (Shl_0.getOpcode() == ISD::SUB) {
603  SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
604  SDValue Sub_1 = Shl_0.getOperand(1); // Val
605  if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Sub_0)) {
606  if (C1->getSExtValue() != 0 || Sub_1.getOpcode() != ISD::SHL)
607  return Default();
608  SDValue Shl2_0 = Sub_1.getOperand(0); // Val
609  SDValue Shl2_1 = Sub_1.getOperand(1); // Const
610  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(Shl2_1)) {
611  int32_t ValConst = 1 << (ShlConst + C2->getSExtValue());
612  if (isInt<9>(-ValConst)) {
613  SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, MVT::i32);
614  SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
615  MVT::i32, Shl2_0, Val);
616  ReplaceNode(N, Result);
617  return;
618  }
619  }
620  }
621  }
622 
623  return Default();
624 }
625 
626 //
627 // Handling intrinsics for circular load and bitreverse load.
628 //
633  return;
634  }
635 
636  // Handle bit-reverse load intrinsics.
637  if (SelectBrevLdIntrinsic(N))
638  return;
639 
640  if (SelectNewCircIntrinsic(N))
641  return;
642 
643  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
644  if (IntNo == Intrinsic::hexagon_V6_vgathermw ||
645  IntNo == Intrinsic::hexagon_V6_vgathermw_128B ||
646  IntNo == Intrinsic::hexagon_V6_vgathermh ||
647  IntNo == Intrinsic::hexagon_V6_vgathermh_128B ||
648  IntNo == Intrinsic::hexagon_V6_vgathermhw ||
649  IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) {
650  SelectV65Gather(N);
651  return;
652  }
653  if (IntNo == Intrinsic::hexagon_V6_vgathermwq ||
654  IntNo == Intrinsic::hexagon_V6_vgathermwq_128B ||
655  IntNo == Intrinsic::hexagon_V6_vgathermhq ||
656  IntNo == Intrinsic::hexagon_V6_vgathermhq_128B ||
657  IntNo == Intrinsic::hexagon_V6_vgathermhwq ||
658  IntNo == Intrinsic::hexagon_V6_vgathermhwq_128B) {
660  return;
661  }
662 
663  SelectCode(N);
664 }
665 
667  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
668  unsigned Bits;
669  switch (IID) {
670  case Intrinsic::hexagon_S2_vsplatrb:
671  Bits = 8;
672  break;
673  case Intrinsic::hexagon_S2_vsplatrh:
674  Bits = 16;
675  break;
676  case Intrinsic::hexagon_V6_vaddcarry:
677  case Intrinsic::hexagon_V6_vaddcarry_128B:
678  case Intrinsic::hexagon_V6_vsubcarry:
679  case Intrinsic::hexagon_V6_vsubcarry_128B:
681  return;
682  default:
683  SelectCode(N);
684  return;
685  }
686 
687  SDValue V = N->getOperand(1);
688  SDValue U;
689  if (keepsLowBits(V, Bits, U)) {
690  SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
691  N->getOperand(0), U);
692  ReplaceNode(N, R.getNode());
693  SelectCode(R.getNode());
694  return;
695  }
696  SelectCode(N);
697 }
698 
699 //
700 // Map floating point constant values.
701 //
703  SDLoc dl(N);
705  APInt A = CN->getValueAPF().bitcastToAPInt();
706  if (N->getValueType(0) == MVT::f32) {
707  SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32);
708  ReplaceNode(N, CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, V));
709  return;
710  }
711  if (N->getValueType(0) == MVT::f64) {
712  SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i64);
713  ReplaceNode(N, CurDAG->getMachineNode(Hexagon::CONST64, dl, MVT::f64, V));
714  return;
715  }
716 
717  SelectCode(N);
718 }
719 
720 //
721 // Map boolean values.
722 //
724  if (N->getValueType(0) == MVT::i1) {
725  assert(!(cast<ConstantSDNode>(N)->getZExtValue() >> 1));
726  unsigned Opc = (cast<ConstantSDNode>(N)->getSExtValue() != 0)
727  ? Hexagon::PS_true
728  : Hexagon::PS_false;
730  return;
731  }
732 
733  SelectCode(N);
734 }
735 
737  MachineFrameInfo &MFI = MF->getFrameInfo();
738  const HexagonFrameLowering *HFI = HST->getFrameLowering();
739  int FX = cast<FrameIndexSDNode>(N)->getIndex();
740  unsigned StkA = HFI->getStackAlignment();
741  unsigned MaxA = MFI.getMaxAlignment();
743  SDLoc DL(N);
744  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
745  SDNode *R = nullptr;
746 
747  // Use PS_fi when:
748  // - the object is fixed, or
749  // - there are no objects with higher-than-default alignment, or
750  // - there are no dynamically allocated objects.
751  // Otherwise, use PS_fia.
752  if (FX < 0 || MaxA <= StkA || !MFI.hasVarSizedObjects()) {
753  R = CurDAG->getMachineNode(Hexagon::PS_fi, DL, MVT::i32, FI, Zero);
754  } else {
755  auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
756  unsigned AR = HMFI.getStackAlignBaseVReg();
757  SDValue CH = CurDAG->getEntryNode();
758  SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
759  R = CurDAG->getMachineNode(Hexagon::PS_fia, DL, MVT::i32, Ops);
760  }
761 
762  ReplaceNode(N, R);
763 }
764 
766  unsigned OpcCarry = N->getOpcode() == HexagonISD::ADDC ? Hexagon::A4_addp_c
767  : Hexagon::A4_subp_c;
768  SDNode *C = CurDAG->getMachineNode(OpcCarry, SDLoc(N), N->getVTList(),
769  { N->getOperand(0), N->getOperand(1),
770  N->getOperand(2) });
771  ReplaceNode(N, C);
772 }
773 
775  MVT ResTy = N->getValueType(0).getSimpleVT();
776  if (HST->isHVXVectorType(ResTy, true))
777  return SelectHvxVAlign(N);
778 
779  const SDLoc &dl(N);
780  unsigned VecLen = ResTy.getSizeInBits();
781  if (VecLen == 32) {
782  SDValue Ops[] = {
783  CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
784  N->getOperand(0),
785  CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
786  N->getOperand(1),
787  CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
788  };
789  SDNode *R = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
790  MVT::i64, Ops);
791 
792  // Shift right by "(Addr & 0x3) * 8" bytes.
793  SDValue M0 = CurDAG->getTargetConstant(0x18, dl, MVT::i32);
794  SDValue M1 = CurDAG->getTargetConstant(0x03, dl, MVT::i32);
795  SDNode *C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32,
796  M0, N->getOperand(2), M1);
797  SDNode *S = CurDAG->getMachineNode(Hexagon::S2_lsr_r_p, dl, MVT::i64,
798  SDValue(R, 0), SDValue(C, 0));
799  SDValue E = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, ResTy,
800  SDValue(S, 0));
801  ReplaceNode(N, E.getNode());
802  } else {
803  assert(VecLen == 64);
804  SDNode *Pu = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::v8i1,
805  N->getOperand(2));
806  SDNode *VA = CurDAG->getMachineNode(Hexagon::S2_valignrb, dl, ResTy,
807  N->getOperand(0), N->getOperand(1),
808  SDValue(Pu,0));
809  ReplaceNode(N, VA);
810  }
811 }
812 
814  const SDLoc &dl(N);
815  SDValue A = N->getOperand(1);
816  int Mask = -cast<ConstantSDNode>(A.getNode())->getSExtValue();
817  assert(isPowerOf2_32(-Mask));
818 
819  SDValue M = CurDAG->getTargetConstant(Mask, dl, MVT::i32);
820  SDNode *AA = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
821  N->getOperand(0), M);
822  ReplaceNode(N, AA);
823 }
824 
825 // Handle these nodes here to avoid having to write patterns for all
826 // combinations of input/output types. In all cases, the resulting
827 // instruction is the same.
829  SDValue Op = N->getOperand(0);
830  MVT OpTy = Op.getValueType().getSimpleVT();
831  SDNode *T = CurDAG->MorphNodeTo(N, N->getOpcode(),
832  CurDAG->getVTList(OpTy), {Op});
833  ReplaceNode(T, Op.getNode());
834 }
835 
837  MVT ResTy = N->getValueType(0).getSimpleVT();
838  SDNode *T = CurDAG->getMachineNode(Hexagon::C2_mask, SDLoc(N), ResTy,
839  N->getOperand(0));
840  ReplaceNode(N, T);
841 }
842 
844  const SDLoc &dl(N);
845  MVT ResTy = N->getValueType(0).getSimpleVT();
846  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
847  SDNode *T = CurDAG->getMachineNode(Hexagon::A4_vcmpbgtui, dl, ResTy,
848  N->getOperand(0), Zero);
849  ReplaceNode(N, T);
850 }
851 
853  const SDLoc &dl(N);
854  MVT ResTy = N->getValueType(0).getSimpleVT();
855 
857  SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
858  SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandvrt, dl, ResTy,
859  N->getOperand(0), SDValue(R,0));
860  ReplaceNode(N, T);
861 }
862 
864  const SDLoc &dl(N);
865  MVT ResTy = N->getValueType(0).getSimpleVT();
866 
868  SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
869  SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandqrt, dl, ResTy,
870  N->getOperand(0), SDValue(R,0));
871  ReplaceNode(N, T);
872 }
873 
875  if (N->isMachineOpcode())
876  return N->setNodeId(-1); // Already selected.
877 
878  switch (N->getOpcode()) {
879  case ISD::Constant: return SelectConstant(N);
880  case ISD::ConstantFP: return SelectConstantFP(N);
881  case ISD::FrameIndex: return SelectFrameIndex(N);
882  case ISD::SHL: return SelectSHL(N);
883  case ISD::LOAD: return SelectLoad(N);
884  case ISD::STORE: return SelectStore(N);
887 
888  case HexagonISD::ADDC:
889  case HexagonISD::SUBC: return SelectAddSubCarry(N);
890  case HexagonISD::VALIGN: return SelectVAlign(N);
892  case HexagonISD::TYPECAST: return SelectTypecast(N);
893  case HexagonISD::P2D: return SelectP2D(N);
894  case HexagonISD::D2P: return SelectD2P(N);
895  case HexagonISD::Q2V: return SelectQ2V(N);
896  case HexagonISD::V2Q: return SelectV2Q(N);
897  }
898 
899  if (HST->useHVXOps()) {
900  switch (N->getOpcode()) {
901  case ISD::VECTOR_SHUFFLE: return SelectHvxShuffle(N);
902  case HexagonISD::VROR: return SelectHvxRor(N);
903  }
904  }
905 
906  SelectCode(N);
907 }
908 
910 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
911  std::vector<SDValue> &OutOps) {
912  SDValue Inp = Op, Res;
913 
914  switch (ConstraintID) {
915  default:
916  return true;
918  case InlineAsm::Constraint_o: // Offsetable.
919  case InlineAsm::Constraint_v: // Not offsetable.
920  case InlineAsm::Constraint_m: // Memory.
921  if (SelectAddrFI(Inp, Res))
922  OutOps.push_back(Res);
923  else
924  OutOps.push_back(Inp);
925  break;
926  }
927 
928  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
929  return false;
930 }
931 
932 
933 static bool isMemOPCandidate(SDNode *I, SDNode *U) {
934  // I is an operand of U. Check if U is an arithmetic (binary) operation
935  // usable in a memop, where the other operand is a loaded value, and the
936  // result of U is stored in the same location.
937 
938  if (!U->hasOneUse())
939  return false;
940  unsigned Opc = U->getOpcode();
941  switch (Opc) {
942  case ISD::ADD:
943  case ISD::SUB:
944  case ISD::AND:
945  case ISD::OR:
946  break;
947  default:
948  return false;
949  }
950 
951  SDValue S0 = U->getOperand(0);
952  SDValue S1 = U->getOperand(1);
953  SDValue SY = (S0.getNode() == I) ? S1 : S0;
954 
955  SDNode *UUse = *U->use_begin();
956  if (UUse->getNumValues() != 1)
957  return false;
958 
959  // Check if one of the inputs to U is a load instruction and the output
960  // is used by a store instruction. If so and they also have the same
961  // base pointer, then don't preoprocess this node sequence as it
962  // can be matched to a memop.
963  SDNode *SYNode = SY.getNode();
964  if (UUse->getOpcode() == ISD::STORE && SYNode->getOpcode() == ISD::LOAD) {
965  SDValue LDBasePtr = cast<MemSDNode>(SYNode)->getBasePtr();
966  SDValue STBasePtr = cast<MemSDNode>(UUse)->getBasePtr();
967  if (LDBasePtr == STBasePtr)
968  return true;
969  }
970  return false;
971 }
972 
973 
974 // Transform: (or (select c x 0) z) -> (select c (or x z) z)
975 // (or (select c 0 y) z) -> (select c z (or y z))
976 void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
977  SelectionDAG &DAG = *CurDAG;
978 
979  for (auto I : Nodes) {
980  if (I->getOpcode() != ISD::OR)
981  continue;
982 
983  auto IsZero = [] (const SDValue &V) -> bool {
984  if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode()))
985  return SC->isNullValue();
986  return false;
987  };
988  auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
989  if (Op.getOpcode() != ISD::SELECT)
990  return false;
991  return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2));
992  };
993 
994  SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
995  EVT VT = I->getValueType(0);
996  bool SelN0 = IsSelect0(N0);
997  SDValue SOp = SelN0 ? N0 : N1;
998  SDValue VOp = SelN0 ? N1 : N0;
999 
1000  if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) {
1001  SDValue SC = SOp.getOperand(0);
1002  SDValue SX = SOp.getOperand(1);
1003  SDValue SY = SOp.getOperand(2);
1004  SDLoc DLS = SOp;
1005  if (IsZero(SY)) {
1006  SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
1007  SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
1008  DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1009  } else if (IsZero(SX)) {
1010  SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
1011  SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
1012  DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1013  }
1014  }
1015  }
1016 }
1017 
1018 // Transform: (store ch val (add x (add (shl y c) e)))
1019 // to: (store ch val (add x (shl (add y d) c))),
1020 // where e = (shl d c) for some integer d.
1021 // The purpose of this is to enable generation of loads/stores with
1022 // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1023 // value c must be 0, 1 or 2.
1024 void HexagonDAGToDAGISel::ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes) {
1025  SelectionDAG &DAG = *CurDAG;
1026 
1027  for (auto I : Nodes) {
1028  if (I->getOpcode() != ISD::STORE)
1029  continue;
1030 
1031  // I matched: (store ch val Off)
1032  SDValue Off = I->getOperand(2);
1033  // Off needs to match: (add x (add (shl y c) (shl d c))))
1034  if (Off.getOpcode() != ISD::ADD)
1035  continue;
1036  // Off matched: (add x T0)
1037  SDValue T0 = Off.getOperand(1);
1038  // T0 needs to match: (add T1 T2):
1039  if (T0.getOpcode() != ISD::ADD)
1040  continue;
1041  // T0 matched: (add T1 T2)
1042  SDValue T1 = T0.getOperand(0);
1043  SDValue T2 = T0.getOperand(1);
1044  // T1 needs to match: (shl y c)
1045  if (T1.getOpcode() != ISD::SHL)
1046  continue;
1047  SDValue C = T1.getOperand(1);
1049  if (CN == nullptr)
1050  continue;
1051  unsigned CV = CN->getZExtValue();
1052  if (CV > 2)
1053  continue;
1054  // T2 needs to match e, where e = (shl d c) for some d.
1056  if (EN == nullptr)
1057  continue;
1058  unsigned EV = EN->getZExtValue();
1059  if (EV % (1 << CV) != 0)
1060  continue;
1061  unsigned DV = EV / (1 << CV);
1062 
1063  // Replace T0 with: (shl (add y d) c)
1064  SDLoc DL = SDLoc(I);
1065  EVT VT = T0.getValueType();
1066  SDValue D = DAG.getConstant(DV, DL, VT);
1067  // NewAdd = (add y d)
1068  SDValue NewAdd = DAG.getNode(ISD::ADD, DL, VT, T1.getOperand(0), D);
1069  // NewShl = (shl NewAdd c)
1070  SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
1071  ReplaceNode(T0.getNode(), NewShl.getNode());
1072  }
1073 }
1074 
1075 // Transform: (load ch (add x (and (srl y c) Mask)))
1076 // to: (load ch (add x (shl (srl y d) d-c)))
1077 // where
1078 // Mask = 00..0 111..1 0.0
1079 // | | +-- d-c 0s, and d-c is 0, 1 or 2.
1080 // | +-------- 1s
1081 // +-------------- at most c 0s
1082 // Motivating example:
1083 // DAG combiner optimizes (add x (shl (srl y 5) 2))
1084 // to (add x (and (srl y 3) 1FFFFFFC))
1085 // which results in a constant-extended and(##...,lsr). This transformation
1086 // undoes this simplification for cases where the shl can be folded into
1087 // an addressing mode.
1088 void HexagonDAGToDAGISel::ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes) {
1089  SelectionDAG &DAG = *CurDAG;
1090 
1091  for (SDNode *N : Nodes) {
1092  unsigned Opc = N->getOpcode();
1093  if (Opc != ISD::LOAD && Opc != ISD::STORE)
1094  continue;
1095  SDValue Addr = Opc == ISD::LOAD ? N->getOperand(1) : N->getOperand(2);
1096  // Addr must match: (add x T0)
1097  if (Addr.getOpcode() != ISD::ADD)
1098  continue;
1099  SDValue T0 = Addr.getOperand(1);
1100  // T0 must match: (and T1 Mask)
1101  if (T0.getOpcode() != ISD::AND)
1102  continue;
1103 
1104  // We have an AND.
1105  //
1106  // Check the first operand. It must be: (srl y c).
1107  SDValue S = T0.getOperand(0);
1108  if (S.getOpcode() != ISD::SRL)
1109  continue;
1111  if (SN == nullptr)
1112  continue;
1113  if (SN->getAPIntValue().getBitWidth() != 32)
1114  continue;
1115  uint32_t CV = SN->getZExtValue();
1116 
1117  // Check the second operand: the supposed mask.
1119  if (MN == nullptr)
1120  continue;
1121  if (MN->getAPIntValue().getBitWidth() != 32)
1122  continue;
1123  uint32_t Mask = MN->getZExtValue();
1124  // Examine the mask.
1125  uint32_t TZ = countTrailingZeros(Mask);
1126  uint32_t M1 = countTrailingOnes(Mask >> TZ);
1127  uint32_t LZ = countLeadingZeros(Mask);
1128  // Trailing zeros + middle ones + leading zeros must equal the width.
1129  if (TZ + M1 + LZ != 32)
1130  continue;
1131  // The number of trailing zeros will be encoded in the addressing mode.
1132  if (TZ > 2)
1133  continue;
1134  // The number of leading zeros must be at most c.
1135  if (LZ > CV)
1136  continue;
1137 
1138  // All looks good.
1139  SDValue Y = S.getOperand(0);
1140  EVT VT = Addr.getValueType();
1141  SDLoc dl(S);
1142  // TZ = D-C, so D = TZ+C.
1143  SDValue D = DAG.getConstant(TZ+CV, dl, VT);
1144  SDValue DC = DAG.getConstant(TZ, dl, VT);
1145  SDValue NewSrl = DAG.getNode(ISD::SRL, dl, VT, Y, D);
1146  SDValue NewShl = DAG.getNode(ISD::SHL, dl, VT, NewSrl, DC);
1147  ReplaceNode(T0.getNode(), NewShl.getNode());
1148  }
1149 }
1150 
1151 // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1152 // (op ... 1 ...))
1153 void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) {
1154  SelectionDAG &DAG = *CurDAG;
1155 
1156  for (SDNode *N : Nodes) {
1157  unsigned Opc = N->getOpcode();
1158  if (Opc != ISD::ZERO_EXTEND)
1159  continue;
1160  SDValue OpI1 = N->getOperand(0);
1161  EVT OpVT = OpI1.getValueType();
1162  if (!OpVT.isSimple() || OpVT.getSimpleVT() != MVT::i1)
1163  continue;
1164  for (auto I = N->use_begin(), E = N->use_end(); I != E; ++I) {
1165  SDNode *U = *I;
1166  if (U->getNumValues() != 1)
1167  continue;
1168  EVT UVT = U->getValueType(0);
1169  if (!UVT.isSimple() || !UVT.isInteger() || UVT.getSimpleVT() == MVT::i1)
1170  continue;
1171  if (isMemOPCandidate(N, U))
1172  continue;
1173 
1174  // Potentially simplifiable operation.
1175  unsigned I1N = I.getOperandNo();
1177  for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i)
1178  Ops[i] = U->getOperand(i);
1179  EVT BVT = Ops[I1N].getValueType();
1180 
1181  SDLoc dl(U);
1182  SDValue C0 = DAG.getConstant(0, dl, BVT);
1183  SDValue C1 = DAG.getConstant(1, dl, BVT);
1184  SDValue If0, If1;
1185 
1186  if (isa<MachineSDNode>(U)) {
1187  unsigned UseOpc = U->getMachineOpcode();
1188  Ops[I1N] = C0;
1189  If0 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1190  Ops[I1N] = C1;
1191  If1 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1192  } else {
1193  unsigned UseOpc = U->getOpcode();
1194  Ops[I1N] = C0;
1195  If0 = DAG.getNode(UseOpc, dl, UVT, Ops);
1196  Ops[I1N] = C1;
1197  If1 = DAG.getNode(UseOpc, dl, UVT, Ops);
1198  }
1199  SDValue Sel = DAG.getNode(ISD::SELECT, dl, UVT, OpI1, If1, If0);
1200  DAG.ReplaceAllUsesWith(U, Sel.getNode());
1201  }
1202  }
1203 }
1204 
1206  // Repack all nodes before calling each preprocessing function,
1207  // because each of them can modify the set of nodes.
1208  auto getNodes = [this] () -> std::vector<SDNode*> {
1209  std::vector<SDNode*> T;
1210  T.reserve(CurDAG->allnodes_size());
1211  for (SDNode &N : CurDAG->allnodes())
1212  T.push_back(&N);
1213  return T;
1214  };
1215 
1216  // Transform: (or (select c x 0) z) -> (select c (or x z) z)
1217  // (or (select c 0 y) z) -> (select c z (or y z))
1218  ppSimplifyOrSelect0(getNodes());
1219 
1220  // Transform: (store ch val (add x (add (shl y c) e)))
1221  // to: (store ch val (add x (shl (add y d) c))),
1222  // where e = (shl d c) for some integer d.
1223  // The purpose of this is to enable generation of loads/stores with
1224  // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1225  // value c must be 0, 1 or 2.
1226  ppAddrReorderAddShl(getNodes());
1227 
1228  // Transform: (load ch (add x (and (srl y c) Mask)))
1229  // to: (load ch (add x (shl (srl y d) d-c)))
1230  // where
1231  // Mask = 00..0 111..1 0.0
1232  // | | +-- d-c 0s, and d-c is 0, 1 or 2.
1233  // | +-------- 1s
1234  // +-------------- at most c 0s
1235  // Motivating example:
1236  // DAG combiner optimizes (add x (shl (srl y 5) 2))
1237  // to (add x (and (srl y 3) 1FFFFFFC))
1238  // which results in a constant-extended and(##...,lsr). This transformation
1239  // undoes this simplification for cases where the shl can be folded into
1240  // an addressing mode.
1241  ppAddrRewriteAndSrl(getNodes());
1242 
1243  // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1244  // (op ... 1 ...))
1245  ppHoistZextI1(getNodes());
1246 
1247  DEBUG_WITH_TYPE("isel", {
1248  dbgs() << "Preprocessed (Hexagon) selection DAG:";
1249  CurDAG->dump();
1250  });
1251 
1253  rebalanceAddressTrees();
1254 
1255  DEBUG_WITH_TYPE("isel", {
1256  dbgs() << "Address tree balanced selection DAG:";
1257  CurDAG->dump();
1258  });
1259  }
1260 }
1261 
1263  auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget());
1264  auto &HFI = *HST.getFrameLowering();
1265  if (!HFI.needsAligna(*MF))
1266  return;
1267 
1268  MachineFrameInfo &MFI = MF->getFrameInfo();
1269  MachineBasicBlock *EntryBB = &MF->front();
1270  unsigned AR = FuncInfo->CreateReg(MVT::i32);
1271  unsigned MaxA = MFI.getMaxAlignment();
1272  BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AR)
1273  .addImm(MaxA);
1274  MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
1275 }
1276 
1277 // Match a frame index that can be used in an addressing mode.
1279  if (N.getOpcode() != ISD::FrameIndex)
1280  return false;
1281  auto &HFI = *HST->getFrameLowering();
1282  MachineFrameInfo &MFI = MF->getFrameInfo();
1283  int FX = cast<FrameIndexSDNode>(N)->getIndex();
1284  if (!MFI.isFixedObjectIndex(FX) && HFI.needsAligna(*MF))
1285  return false;
1287  return true;
1288 }
1289 
1291  return SelectGlobalAddress(N, R, false, 0);
1292 }
1293 
1295  return SelectGlobalAddress(N, R, true, 0);
1296 }
1297 
1299  return SelectAnyImmediate(N, R, 0);
1300 }
1301 
1303  return SelectAnyImmediate(N, R, 0);
1304 }
1306  return SelectAnyImmediate(N, R, 1);
1307 }
1309  return SelectAnyImmediate(N, R, 2);
1310 }
1312  return SelectAnyImmediate(N, R, 3);
1313 }
1314 
1316  EVT T = N.getValueType();
1317  if (!T.isInteger() || T.getSizeInBits() != 32 || !isa<ConstantSDNode>(N))
1318  return false;
1319  R = N;
1320  return true;
1321 }
1322 
1324  uint32_t LogAlign) {
1325  auto IsAligned = [LogAlign] (uint64_t V) -> bool {
1326  return alignTo(V, (uint64_t)1 << LogAlign) == V;
1327  };
1328 
1329  switch (N.getOpcode()) {
1330  case ISD::Constant: {
1331  if (N.getValueType() != MVT::i32)
1332  return false;
1333  int32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
1334  if (!IsAligned(V))
1335  return false;
1336  R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
1337  return true;
1338  }
1339  case HexagonISD::JT:
1340  case HexagonISD::CP:
1341  // These are assumed to always be aligned at least 8-byte boundary.
1342  if (LogAlign > 3)
1343  return false;
1344  R = N.getOperand(0);
1345  return true;
1346  case ISD::ExternalSymbol:
1347  // Symbols may be aligned at any boundary.
1348  if (LogAlign > 0)
1349  return false;
1350  R = N;
1351  return true;
1352  case ISD::BlockAddress:
1353  // Block address is always aligned at least 4-byte boundary.
1354  if (LogAlign > 2 || !IsAligned(cast<BlockAddressSDNode>(N)->getOffset()))
1355  return false;
1356  R = N;
1357  return true;
1358  }
1359 
1360  if (SelectGlobalAddress(N, R, false, LogAlign) ||
1361  SelectGlobalAddress(N, R, true, LogAlign))
1362  return true;
1363 
1364  return false;
1365 }
1366 
1368  bool UseGP, uint32_t LogAlign) {
1369  auto IsAligned = [LogAlign] (uint64_t V) -> bool {
1370  return alignTo(V, (uint64_t)1 << LogAlign) == V;
1371  };
1372 
1373  switch (N.getOpcode()) {
1374  case ISD::ADD: {
1375  SDValue N0 = N.getOperand(0);
1376  SDValue N1 = N.getOperand(1);
1377  unsigned GAOpc = N0.getOpcode();
1378  if (UseGP && GAOpc != HexagonISD::CONST32_GP)
1379  return false;
1380  if (!UseGP && GAOpc != HexagonISD::CONST32)
1381  return false;
1382  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) {
1383  SDValue Addr = N0.getOperand(0);
1384  // For the purpose of alignment, sextvalue and zextvalue are the same.
1385  if (!IsAligned(Const->getZExtValue()))
1386  return false;
1387  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) {
1388  if (GA->getOpcode() == ISD::TargetGlobalAddress) {
1389  uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
1390  R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const),
1391  N.getValueType(), NewOff);
1392  return true;
1393  }
1394  }
1395  }
1396  break;
1397  }
1398  case HexagonISD::CP:
1399  case HexagonISD::JT:
1400  case HexagonISD::CONST32:
1401  // The operand(0) of CONST32 is TargetGlobalAddress, which is what we
1402  // want in the instruction.
1403  if (!UseGP)
1404  R = N.getOperand(0);
1405  return !UseGP;
1407  if (UseGP)
1408  R = N.getOperand(0);
1409  return UseGP;
1410  default:
1411  return false;
1412  }
1413 
1414  return false;
1415 }
1416 
1418  // This (complex pattern) function is meant to detect a sign-extension
1419  // i32->i64 on a per-operand basis. This would allow writing single
1420  // patterns that would cover a number of combinations of different ways
1421  // a sign-extensions could be written. For example:
1422  // (mul (DetectUseSxtw x) (DetectUseSxtw y)) -> (M2_dpmpyss_s0 x y)
1423  // could match either one of these:
1424  // (mul (sext x) (sext_inreg y))
1425  // (mul (sext-load *p) (sext_inreg y))
1426  // (mul (sext_inreg x) (sext y))
1427  // etc.
1428  //
1429  // The returned value will have type i64 and its low word will
1430  // contain the value being extended. The high bits are not specified.
1431  // The returned type is i64 because the original type of N was i64,
1432  // but the users of this function should only use the low-word of the
1433  // result, e.g.
1434  // (mul sxtw:x, sxtw:y) -> (M2_dpmpyss_s0 (LoReg sxtw:x), (LoReg sxtw:y))
1435 
1436  if (N.getValueType() != MVT::i64)
1437  return false;
1438  unsigned Opc = N.getOpcode();
1439  switch (Opc) {
1440  case ISD::SIGN_EXTEND:
1441  case ISD::SIGN_EXTEND_INREG: {
1442  // sext_inreg has the source type as a separate operand.
1443  EVT T = Opc == ISD::SIGN_EXTEND
1444  ? N.getOperand(0).getValueType()
1445  : cast<VTSDNode>(N.getOperand(1))->getVT();
1446  unsigned SW = T.getSizeInBits();
1447  if (SW == 32)
1448  R = N.getOperand(0);
1449  else if (SW < 32)
1450  R = N;
1451  else
1452  return false;
1453  break;
1454  }
1455  case ISD::LOAD: {
1456  LoadSDNode *L = cast<LoadSDNode>(N);
1457  if (L->getExtensionType() != ISD::SEXTLOAD)
1458  return false;
1459  // All extending loads extend to i32, so even if the value in
1460  // memory is shorter than 32 bits, it will be i32 after the load.
1461  if (L->getMemoryVT().getSizeInBits() > 32)
1462  return false;
1463  R = N;
1464  break;
1465  }
1466  case ISD::SRA: {
1467  auto *S = dyn_cast<ConstantSDNode>(N.getOperand(1));
1468  if (!S || S->getZExtValue() != 32)
1469  return false;
1470  R = N;
1471  break;
1472  }
1473  default:
1474  return false;
1475  }
1476  EVT RT = R.getValueType();
1477  if (RT == MVT::i64)
1478  return true;
1479  assert(RT == MVT::i32);
1480  // This is only to produce a value of type i64. Do not rely on the
1481  // high bits produced by this.
1482  const SDLoc &dl(N);
1483  SDValue Ops[] = {
1484  CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
1485  R, CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
1486  R, CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
1487  };
1488  SDNode *T = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
1489  MVT::i64, Ops);
1490  R = SDValue(T, 0);
1491  return true;
1492 }
1493 
1494 bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
1495  SDValue &Src) {
1496  unsigned Opc = Val.getOpcode();
1497  switch (Opc) {
1498  case ISD::SIGN_EXTEND:
1499  case ISD::ZERO_EXTEND:
1500  case ISD::ANY_EXTEND: {
1501  const SDValue &Op0 = Val.getOperand(0);
1502  EVT T = Op0.getValueType();
1503  if (T.isInteger() && T.getSizeInBits() == NumBits) {
1504  Src = Op0;
1505  return true;
1506  }
1507  break;
1508  }
1510  case ISD::AssertSext:
1511  case ISD::AssertZext:
1512  if (Val.getOperand(0).getValueType().isInteger()) {
1513  VTSDNode *T = cast<VTSDNode>(Val.getOperand(1));
1514  if (T->getVT().getSizeInBits() == NumBits) {
1515  Src = Val.getOperand(0);
1516  return true;
1517  }
1518  }
1519  break;
1520  case ISD::AND: {
1521  // Check if this is an AND with NumBits of lower bits set to 1.
1522  uint64_t Mask = (1 << NumBits) - 1;
1523  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1524  if (C->getZExtValue() == Mask) {
1525  Src = Val.getOperand(1);
1526  return true;
1527  }
1528  }
1529  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1530  if (C->getZExtValue() == Mask) {
1531  Src = Val.getOperand(0);
1532  return true;
1533  }
1534  }
1535  break;
1536  }
1537  case ISD::OR:
1538  case ISD::XOR: {
1539  // OR/XOR with the lower NumBits bits set to 0.
1540  uint64_t Mask = (1 << NumBits) - 1;
1541  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1542  if ((C->getZExtValue() & Mask) == 0) {
1543  Src = Val.getOperand(1);
1544  return true;
1545  }
1546  }
1547  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1548  if ((C->getZExtValue() & Mask) == 0) {
1549  Src = Val.getOperand(0);
1550  return true;
1551  }
1552  }
1553  }
1554  default:
1555  break;
1556  }
1557  return false;
1558 }
1559 
1560 bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
1561  return N->getAlignment() >= N->getMemoryVT().getStoreSize();
1562 }
1563 
1564 bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const {
1565  unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF);
1566  switch (N->getMemoryVT().getStoreSize()) {
1567  case 1:
1568  return StackSize <= 56; // 1*2^6 - 8
1569  case 2:
1570  return StackSize <= 120; // 2*2^6 - 8
1571  case 4:
1572  return StackSize <= 248; // 4*2^6 - 8
1573  default:
1574  return false;
1575  }
1576 }
1577 
1578 // Return true when the given node fits in a positive half word.
1579 bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const {
1580  if (const ConstantSDNode *CN = dyn_cast<const ConstantSDNode>(N)) {
1581  int64_t V = CN->getSExtValue();
1582  return V > 0 && isInt<16>(V);
1583  }
1584  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
1585  const VTSDNode *VN = dyn_cast<const VTSDNode>(N->getOperand(1));
1586  return VN->getVT().getSizeInBits() <= 16;
1587  }
1588  return false;
1589 }
1590 
1591 bool HexagonDAGToDAGISel::hasOneUse(const SDNode *N) const {
1592  return !CheckSingleUse || N->hasOneUse();
1593 }
1594 
1595 ////////////////////////////////////////////////////////////////////////////////
1596 // Rebalancing of address calculation trees
1597 
1598 static bool isOpcodeHandled(const SDNode *N) {
1599  switch (N->getOpcode()) {
1600  case ISD::ADD:
1601  case ISD::MUL:
1602  return true;
1603  case ISD::SHL:
1604  // We only handle constant shifts because these can be easily flattened
1605  // into multiplications by 2^Op1.
1606  return isa<ConstantSDNode>(N->getOperand(1).getNode());
1607  default:
1608  return false;
1609  }
1610 }
1611 
1612 /// Return the weight of an SDNode
1613 int HexagonDAGToDAGISel::getWeight(SDNode *N) {
1614  if (!isOpcodeHandled(N))
1615  return 1;
1616  assert(RootWeights.count(N) && "Cannot get weight of unseen root!");
1617  assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!");
1618  assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!");
1619  return RootWeights[N];
1620 }
1621 
1622 int HexagonDAGToDAGISel::getHeight(SDNode *N) {
1623  if (!isOpcodeHandled(N))
1624  return 0;
1625  assert(RootWeights.count(N) && RootWeights[N] >= 0 &&
1626  "Cannot query height of unvisited/RAUW'd node!");
1627  return RootHeights[N];
1628 }
1629 
1630 namespace {
1631 struct WeightedLeaf {
1632  SDValue Value;
1633  int Weight;
1634  int InsertionOrder;
1635 
1636  WeightedLeaf() : Value(SDValue()) { }
1637 
1638  WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
1639  Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
1640  assert(Weight >= 0 && "Weight must be >= 0");
1641  }
1642 
1643  static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) {
1644  assert(A.Value.getNode() && B.Value.getNode());
1645  return A.Weight == B.Weight ?
1646  (A.InsertionOrder > B.InsertionOrder) :
1647  (A.Weight > B.Weight);
1648  }
1649 };
1650 
1651 /// A specialized priority queue for WeigthedLeaves. It automatically folds
1652 /// constants and allows removal of non-top elements while maintaining the
1653 /// priority order.
1654 class LeafPrioQueue {
1656  bool HaveConst;
1657  WeightedLeaf ConstElt;
1658  unsigned Opcode;
1659 
1660 public:
1661  bool empty() {
1662  return (!HaveConst && Q.empty());
1663  }
1664 
1665  size_t size() {
1666  return Q.size() + HaveConst;
1667  }
1668 
1669  bool hasConst() {
1670  return HaveConst;
1671  }
1672 
1673  const WeightedLeaf &top() {
1674  if (HaveConst)
1675  return ConstElt;
1676  return Q.front();
1677  }
1678 
1679  WeightedLeaf pop() {
1680  if (HaveConst) {
1681  HaveConst = false;
1682  return ConstElt;
1683  }
1684  std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1685  return Q.pop_back_val();
1686  }
1687 
1688  void push(WeightedLeaf L, bool SeparateConst=true) {
1689  if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) {
1690  if (Opcode == ISD::MUL &&
1691  cast<ConstantSDNode>(L.Value)->getSExtValue() == 1)
1692  return;
1693  if (Opcode == ISD::ADD &&
1694  cast<ConstantSDNode>(L.Value)->getSExtValue() == 0)
1695  return;
1696 
1697  HaveConst = true;
1698  ConstElt = L;
1699  } else {
1700  Q.push_back(L);
1701  std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1702  }
1703  }
1704 
1705  /// Push L to the bottom of the queue regardless of its weight. If L is
1706  /// constant, it will not be folded with other constants in the queue.
1707  void pushToBottom(WeightedLeaf L) {
1708  L.Weight = 1000;
1709  push(L, false);
1710  }
1711 
1712  /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of
1713  /// lowest weight and remove it from the queue.
1714  WeightedLeaf findSHL(uint64_t MaxAmount);
1715 
1716  WeightedLeaf findMULbyConst();
1717 
1718  LeafPrioQueue(unsigned Opcode) :
1719  HaveConst(false), Opcode(Opcode) { }
1720 };
1721 } // end anonymous namespace
1722 
1723 WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) {
1724  int ResultPos;
1725  WeightedLeaf Result;
1726 
1727  for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1728  const WeightedLeaf &L = Q[Pos];
1729  const SDValue &Val = L.Value;
1730  if (Val.getOpcode() != ISD::SHL ||
1731  !isa<ConstantSDNode>(Val.getOperand(1)) ||
1732  Val.getConstantOperandVal(1) > MaxAmount)
1733  continue;
1734  if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1735  (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1736  {
1737  Result = L;
1738  ResultPos = Pos;
1739  }
1740  }
1741 
1742  if (Result.Value.getNode()) {
1743  Q.erase(&Q[ResultPos]);
1744  std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1745  }
1746 
1747  return Result;
1748 }
1749 
1750 WeightedLeaf LeafPrioQueue::findMULbyConst() {
1751  int ResultPos;
1752  WeightedLeaf Result;
1753 
1754  for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1755  const WeightedLeaf &L = Q[Pos];
1756  const SDValue &Val = L.Value;
1757  if (Val.getOpcode() != ISD::MUL ||
1758  !isa<ConstantSDNode>(Val.getOperand(1)) ||
1759  Val.getConstantOperandVal(1) > 127)
1760  continue;
1761  if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1762  (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1763  {
1764  Result = L;
1765  ResultPos = Pos;
1766  }
1767  }
1768 
1769  if (Result.Value.getNode()) {
1770  Q.erase(&Q[ResultPos]);
1771  std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1772  }
1773 
1774  return Result;
1775 }
1776 
1777 SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) {
1778  uint64_t MulFactor = 1ull << N->getConstantOperandVal(1);
1779  return CurDAG->getConstant(MulFactor, SDLoc(N),
1780  N->getOperand(1).getValueType());
1781 }
1782 
1783 /// @returns the value x for which 2^x is a factor of Val
1784 static unsigned getPowerOf2Factor(SDValue Val) {
1785  if (Val.getOpcode() == ISD::MUL) {
1786  unsigned MaxFactor = 0;
1787  for (int i = 0; i < 2; ++i) {
1789  if (!C)
1790  continue;
1791  const APInt &CInt = C->getAPIntValue();
1792  if (CInt.getBoolValue())
1793  MaxFactor = CInt.countTrailingZeros();
1794  }
1795  return MaxFactor;
1796  }
1797  if (Val.getOpcode() == ISD::SHL) {
1798  if (!isa<ConstantSDNode>(Val.getOperand(1).getNode()))
1799  return 0;
1800  return (unsigned) Val.getConstantOperandVal(1);
1801  }
1802 
1803  return 0;
1804 }
1805 
1806 /// @returns true if V>>Amount will eliminate V's operation on its child
1807 static bool willShiftRightEliminate(SDValue V, unsigned Amount) {
1808  if (V.getOpcode() == ISD::MUL) {
1809  SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1810  for (int i = 0; i < 2; ++i)
1811  if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1812  V.getConstantOperandVal(i) % (1ULL << Amount) == 0) {
1813  uint64_t NewConst = V.getConstantOperandVal(i) >> Amount;
1814  return (NewConst == 1);
1815  }
1816  } else if (V.getOpcode() == ISD::SHL) {
1817  return (Amount == V.getConstantOperandVal(1));
1818  }
1819 
1820  return false;
1821 }
1822 
1823 SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) {
1824  SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1825  if (V.getOpcode() == ISD::MUL) {
1826  for (int i=0; i < 2; ++i) {
1827  if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1828  V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) {
1829  uint64_t NewConst = V.getConstantOperandVal(i) >> Power;
1830  if (NewConst == 1)
1831  return Ops[!i];
1832  Ops[i] = CurDAG->getConstant(NewConst,
1833  SDLoc(V), V.getValueType());
1834  break;
1835  }
1836  }
1837  } else if (V.getOpcode() == ISD::SHL) {
1838  uint64_t ShiftAmount = V.getConstantOperandVal(1);
1839  if (ShiftAmount == Power)
1840  return Ops[0];
1841  Ops[1] = CurDAG->getConstant(ShiftAmount - Power,
1842  SDLoc(V), V.getValueType());
1843  }
1844 
1845  return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops);
1846 }
1847 
1848 static bool isTargetConstant(const SDValue &V) {
1849  return V.getOpcode() == HexagonISD::CONST32 ||
1851 }
1852 
1853 unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) {
1854  if (GAUsesInFunction.count(V))
1855  return GAUsesInFunction[V];
1856 
1857  unsigned Result = 0;
1858  const Function &CurF = CurDAG->getMachineFunction().getFunction();
1859  for (const User *U : V->users()) {
1860  if (isa<Instruction>(U) &&
1861  cast<Instruction>(U)->getParent()->getParent() == &CurF)
1862  ++Result;
1863  }
1864 
1865  GAUsesInFunction[V] = Result;
1866 
1867  return Result;
1868 }
1869 
1870 /// Note - After calling this, N may be dead. It may have been replaced by a
1871 /// new node, so always use the returned value in place of N.
1872 ///
1873 /// @returns The SDValue taking the place of N (which could be N if it is
1874 /// unchanged)
1875 SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
1876  assert(RootWeights.count(N) && "Cannot balance non-root node.");
1877  assert(RootWeights[N] != -2 && "This node was RAUW'd!");
1878  assert(!TopLevel || N->getOpcode() == ISD::ADD);
1879 
1880  // Return early if this node was already visited
1881  if (RootWeights[N] != -1)
1882  return SDValue(N, 0);
1883 
1884  assert(isOpcodeHandled(N));
1885 
1886  SDValue Op0 = N->getOperand(0);
1887  SDValue Op1 = N->getOperand(1);
1888 
1889  // Return early if the operands will remain unchanged or are all roots
1890  if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) &&
1891  (!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) {
1892  SDNode *Op0N = Op0.getNode();
1893  int Weight;
1894  if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) {
1895  Weight = getWeight(balanceSubTree(Op0N).getNode());
1896  // Weight = calculateWeight(Op0N);
1897  } else
1898  Weight = getWeight(Op0N);
1899 
1900  SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd
1901  if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) {
1902  Weight += getWeight(balanceSubTree(Op1N).getNode());
1903  // Weight += calculateWeight(Op1N);
1904  } else
1905  Weight += getWeight(Op1N);
1906 
1907  RootWeights[N] = Weight;
1908  RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
1909  getHeight(N->getOperand(1).getNode())) + 1;
1910 
1911  LLVM_DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
1912  << " Height=" << RootHeights[N] << "): ");
1913  LLVM_DEBUG(N->dump(CurDAG));
1914 
1915  return SDValue(N, 0);
1916  }
1917 
1918  LLVM_DEBUG(dbgs() << "** Balancing root node: ");
1919  LLVM_DEBUG(N->dump(CurDAG));
1920 
1921  unsigned NOpcode = N->getOpcode();
1922 
1923  LeafPrioQueue Leaves(NOpcode);
1924  SmallVector<SDValue, 4> Worklist;
1925  Worklist.push_back(SDValue(N, 0));
1926 
1927  // SHL nodes will be converted to MUL nodes
1928  if (NOpcode == ISD::SHL)
1929  NOpcode = ISD::MUL;
1930 
1931  bool CanFactorize = false;
1932  WeightedLeaf Mul1, Mul2;
1933  unsigned MaxPowerOf2 = 0;
1934  WeightedLeaf GA;
1935 
1936  // Do not try to factor out a shift if there is already a shift at the tip of
1937  // the tree.
1938  bool HaveTopLevelShift = false;
1939  if (TopLevel &&
1940  ((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL &&
1941  Op0.getConstantOperandVal(1) < 4) ||
1942  (isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL &&
1943  Op1.getConstantOperandVal(1) < 4)))
1944  HaveTopLevelShift = true;
1945 
1946  // Flatten the subtree into an ordered list of leaves; at the same time
1947  // determine whether the tree is already balanced.
1948  int InsertionOrder = 0;
1949  SmallDenseMap<SDValue, int> NodeHeights;
1950  bool Imbalanced = false;
1951  int CurrentWeight = 0;
1952  while (!Worklist.empty()) {
1953  SDValue Child = Worklist.pop_back_val();
1954 
1955  if (Child.getNode() != N && RootWeights.count(Child.getNode())) {
1956  // CASE 1: Child is a root note
1957 
1958  int Weight = RootWeights[Child.getNode()];
1959  if (Weight == -1) {
1960  Child = balanceSubTree(Child.getNode());
1961  // calculateWeight(Child.getNode());
1962  Weight = getWeight(Child.getNode());
1963  } else if (Weight == -2) {
1964  // Whoops, this node was RAUWd by one of the balanceSubTree calls we
1965  // made. Our worklist isn't up to date anymore.
1966  // Restart the whole process.
1967  LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
1968  return balanceSubTree(N, TopLevel);
1969  }
1970 
1971  NodeHeights[Child] = 1;
1972  CurrentWeight += Weight;
1973 
1974  unsigned PowerOf2;
1975  if (TopLevel && !CanFactorize && !HaveTopLevelShift &&
1976  (Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) &&
1977  Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) {
1978  // Try to identify two factorizable MUL/SHL children greedily. Leave
1979  // them out of the priority queue for now so we can deal with them
1980  // after.
1981  if (!Mul1.Value.getNode()) {
1982  Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++);
1983  MaxPowerOf2 = PowerOf2;
1984  } else {
1985  Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++);
1986  MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2);
1987 
1988  // Our addressing modes can only shift by a maximum of 3
1989  if (MaxPowerOf2 > 3)
1990  MaxPowerOf2 = 3;
1991 
1992  CanFactorize = true;
1993  }
1994  } else
1995  Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
1996  } else if (!isOpcodeHandled(Child.getNode())) {
1997  // CASE 2: Child is an unhandled kind of node (e.g. constant)
1998  int Weight = getWeight(Child.getNode());
1999 
2000  NodeHeights[Child] = getHeight(Child.getNode());
2001  CurrentWeight += Weight;
2002 
2003  if (isTargetConstant(Child) && !GA.Value.getNode())
2004  GA = WeightedLeaf(Child, Weight, InsertionOrder++);
2005  else
2006  Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
2007  } else {
2008  // CASE 3: Child is a subtree of same opcode
2009  // Visit children first, then flatten.
2010  unsigned ChildOpcode = Child.getOpcode();
2011  assert(ChildOpcode == NOpcode ||
2012  (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL));
2013 
2014  // Convert SHL to MUL
2015  SDValue Op1;
2016  if (ChildOpcode == ISD::SHL)
2017  Op1 = getMultiplierForSHL(Child.getNode());
2018  else
2019  Op1 = Child->getOperand(1);
2020 
2021  if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) {
2022  assert(!NodeHeights.count(Child) && "Parent visited before children?");
2023  // Visit children first, then re-visit this node
2024  Worklist.push_back(Child);
2025  Worklist.push_back(Op1);
2026  Worklist.push_back(Child->getOperand(0));
2027  } else {
2028  // Back at this node after visiting the children
2029  if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1)
2030  Imbalanced = true;
2031 
2032  NodeHeights[Child] = std::max(NodeHeights[Op1],
2033  NodeHeights[Child->getOperand(0)]) + 1;
2034  }
2035  }
2036  }
2037 
2038  LLVM_DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
2039  << " weight=" << CurrentWeight
2040  << " imbalanced=" << Imbalanced << "\n");
2041 
2042  // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
2043  // This factors out a shift in order to match memw(a<<Y+b).
2044  if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
2045  willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) {
2046  LLVM_DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
2047  int Weight = Mul1.Weight + Mul2.Weight;
2048  int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
2049  SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
2050  SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2);
2051  SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(),
2052  Mul1Factored, Mul2Factored);
2053  SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N),
2054  Mul1.Value.getValueType());
2055  SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(),
2056  Sum, Const);
2057  NodeHeights[New] = Height;
2058  Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder));
2059  } else if (Mul1.Value.getNode()) {
2060  // We failed to factorize two MULs, so now the Muls are left outside the
2061  // queue... add them back.
2062  Leaves.push(Mul1);
2063  if (Mul2.Value.getNode())
2064  Leaves.push(Mul2);
2065  CanFactorize = false;
2066  }
2067 
2068  // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere
2069  // and the root node itself is not used more than twice. This reduces the
2070  // amount of additional constant extenders introduced by this optimization.
2071  bool CombinedGA = false;
2072  if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() &&
2073  GA.Value.hasOneUse() && N->use_size() < 3) {
2074  GlobalAddressSDNode *GANode =
2075  cast<GlobalAddressSDNode>(GA.Value.getOperand(0));
2076  ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value);
2077 
2078  if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() &&
2080  LLVM_DEBUG(dbgs() << "--> Combining GA and offset ("
2081  << Offset->getSExtValue() << "): ");
2082  LLVM_DEBUG(GANode->dump(CurDAG));
2083 
2084  SDValue NewTGA =
2085  CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
2086  GANode->getValueType(0),
2087  GANode->getOffset() + (uint64_t)Offset->getSExtValue());
2088  GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value),
2089  GA.Value.getValueType(), NewTGA);
2090  GA.Weight += Leaves.top().Weight;
2091 
2092  NodeHeights[GA.Value] = getHeight(GA.Value.getNode());
2093  CombinedGA = true;
2094 
2095  Leaves.pop(); // Remove the offset constant from the queue
2096  }
2097  }
2098 
2099  if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) ||
2100  (RebalanceOnlyImbalancedTrees && !Imbalanced)) {
2101  RootWeights[N] = CurrentWeight;
2102  RootHeights[N] = NodeHeights[SDValue(N, 0)];
2103 
2104  return SDValue(N, 0);
2105  }
2106 
2107  // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5))
2108  if (NOpcode == ISD::ADD && GA.Value.getNode()) {
2109  WeightedLeaf SHL = Leaves.findSHL(31);
2110  if (SHL.Value.getNode()) {
2111  int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1;
2112  GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value),
2113  GA.Value.getValueType(),
2114  GA.Value, SHL.Value);
2115  GA.Weight = SHL.Weight; // Specifically ignore the GA weight here
2116  NodeHeights[GA.Value] = Height;
2117  }
2118  }
2119 
2120  if (GA.Value.getNode())
2121  Leaves.push(GA);
2122 
2123  // If this is the top level and we haven't factored out a shift, we should try
2124  // to move a constant to the bottom to match addressing modes like memw(rX+C)
2125  if (TopLevel && !CanFactorize && Leaves.hasConst()) {
2126  LLVM_DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
2127  Leaves.pushToBottom(Leaves.pop());
2128  }
2129 
2130  const DataLayout &DL = CurDAG->getDataLayout();
2131  const TargetLowering &TLI = *getTargetLowering();
2132 
2133  // Rebuild the tree using Huffman's algorithm
2134  while (Leaves.size() > 1) {
2135  WeightedLeaf L0 = Leaves.pop();
2136 
2137  // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)),
2138  // otherwise just get the next leaf
2139  WeightedLeaf L1 = Leaves.findMULbyConst();
2140  if (!L1.Value.getNode())
2141  L1 = Leaves.pop();
2142 
2143  assert(L0.Weight <= L1.Weight && "Priority queue is broken!");
2144 
2145  SDValue V0 = L0.Value;
2146  int V0Weight = L0.Weight;
2147  SDValue V1 = L1.Value;
2148  int V1Weight = L1.Weight;
2149 
2150  // Make sure that none of these nodes have been RAUW'd
2151  if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) ||
2152  (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) {
2153  LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
2154  return balanceSubTree(N, TopLevel);
2155  }
2156 
2159  EVT VT = N->getValueType(0);
2160  SDValue NewNode;
2161 
2162  if (V0C && !V1C) {
2163  std::swap(V0, V1);
2164  std::swap(V0C, V1C);
2165  }
2166 
2167  // Calculate height of this node
2168  assert(NodeHeights.count(V0) && NodeHeights.count(V1) &&
2169  "Children must have been visited before re-combining them!");
2170  int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1;
2171 
2172  // Rebuild this node (and restore SHL from MUL if needed)
2173  if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2())
2174  NewNode = CurDAG->getNode(
2175  ISD::SHL, SDLoc(V0), VT, V0,
2177  V1C->getAPIntValue().logBase2(), SDLoc(N),
2178  TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2179  else
2180  NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1);
2181 
2182  NodeHeights[NewNode] = Height;
2183 
2184  int Weight = V0Weight + V1Weight;
2185  Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
2186 
2187  LLVM_DEBUG(dbgs() << "--> Built new node (Weight=" << Weight
2188  << ",Height=" << Height << "):\n");
2189  LLVM_DEBUG(NewNode.dump());
2190  }
2191 
2192  assert(Leaves.size() == 1);
2193  SDValue NewRoot = Leaves.top().Value;
2194 
2195  assert(NodeHeights.count(NewRoot));
2196  int Height = NodeHeights[NewRoot];
2197 
2198  // Restore SHL if we earlier converted it to a MUL
2199  if (NewRoot.getOpcode() == ISD::MUL) {
2200  ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1));
2201  if (V1C && V1C->getAPIntValue().isPowerOf2()) {
2202  EVT VT = NewRoot.getValueType();
2203  SDValue V0 = NewRoot.getOperand(0);
2204  NewRoot = CurDAG->getNode(
2205  ISD::SHL, SDLoc(NewRoot), VT, V0,
2207  V1C->getAPIntValue().logBase2(), SDLoc(NewRoot),
2208  TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2209  }
2210  }
2211 
2212  if (N != NewRoot.getNode()) {
2213  LLVM_DEBUG(dbgs() << "--> Root is now: ");
2214  LLVM_DEBUG(NewRoot.dump());
2215 
2216  // Replace all uses of old root by new root
2217  CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
2218  // Mark that we have RAUW'd N
2219  RootWeights[N] = -2;
2220  } else {
2221  LLVM_DEBUG(dbgs() << "--> Root unchanged.\n");
2222  }
2223 
2224  RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
2225  RootHeights[NewRoot.getNode()] = Height;
2226 
2227  return NewRoot;
2228 }
2229 
2230 void HexagonDAGToDAGISel::rebalanceAddressTrees() {
2231  for (auto I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E;) {
2232  SDNode *N = &*I++;
2233  if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
2234  continue;
2235 
2236  SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr();
2237  if (BasePtr.getOpcode() != ISD::ADD)
2238  continue;
2239 
2240  // We've already processed this node
2241  if (RootWeights.count(BasePtr.getNode()))
2242  continue;
2243 
2244  LLVM_DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
2245  LLVM_DEBUG(N->dump(CurDAG));
2246 
2247  // FindRoots
2248  SmallVector<SDNode *, 4> Worklist;
2249 
2250  Worklist.push_back(BasePtr.getOperand(0).getNode());
2251  Worklist.push_back(BasePtr.getOperand(1).getNode());
2252 
2253  while (!Worklist.empty()) {
2254  SDNode *N = Worklist.pop_back_val();
2255  unsigned Opcode = N->getOpcode();
2256 
2257  if (!isOpcodeHandled(N))
2258  continue;
2259 
2260  Worklist.push_back(N->getOperand(0).getNode());
2261  Worklist.push_back(N->getOperand(1).getNode());
2262 
2263  // Not a root if it has only one use and same opcode as its parent
2264  if (N->hasOneUse() && Opcode == N->use_begin()->getOpcode())
2265  continue;
2266 
2267  // This root node has already been processed
2268  if (RootWeights.count(N))
2269  continue;
2270 
2271  RootWeights[N] = -1;
2272  }
2273 
2274  // Balance node itself
2275  RootWeights[BasePtr.getNode()] = -1;
2276  SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true);
2277 
2278  if (N->getOpcode() == ISD::LOAD)
2279  N = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
2280  NewBasePtr, N->getOperand(2));
2281  else
2282  N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
2283  NewBasePtr, N->getOperand(3));
2284 
2285  LLVM_DEBUG(dbgs() << "--> Final node: ");
2286  LLVM_DEBUG(N->dump(CurDAG));
2287  }
2288 
2290  GAUsesInFunction.clear();
2291  RootHeights.clear();
2292  RootWeights.clear();
2293 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
uint64_t CallInst * C
static MVT getIntegerVT(unsigned BitWidth)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOffset() const
const GlobalValue * getGlobal() const
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
static cl::opt< bool > CheckSingleUse("hexagon-isel-su", cl::Hidden, cl::init(true), cl::desc("Enable checking of SDNode's single-use status"))
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static bool isMemOPCandidate(SDNode *I, SDNode *U)
void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd)
Assign this MachineSDNodes&#39;s memory reference descriptor list.
static bool willShiftRightEliminate(SDValue V, unsigned Amount)
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:343
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool DetectUseSxtw(SDValue &N, SDValue &R)
const SDValue & getValue() const
SDVTList getVTList() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
bool SelectAddrFI(SDValue &N, SDValue &R)
const SDValue & getChain() const
static unsigned getPowerOf2Factor(SDValue Val)
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:131
unsigned getAlignment() const
Hexagon target-specific information for each MachineFunction.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:306
A debug info location.
Definition: DebugLoc.h:34
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685
const HexagonFrameLowering * getFrameLowering() const override
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:189
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
MachineFunction * MF
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1502
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:159
bool isTruncatingStore() const
Return true if the op does a truncation before store.
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:65
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1625
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
static ManagedStatic< DebugCounter > DC
static cl::opt< bool > RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false), cl::desc("Rebalance address tree only if this allows optimizations"))
Shift and rotation operations.
Definition: ISDOpcodes.h:380
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:478
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
const TargetLowering * TLI
static bool isTargetConstant(const SDValue &V)
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl)
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:457
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:398
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This class is used to represent EVT&#39;s, which are used to parameterize some operations.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:446
unsigned getSizeInBits() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:477
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:395
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:623
#define T
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:571
bool SelectNewCircIntrinsic(SDNode *IntN)
Generate a machine instruction node for the new circlar buffer intrinsics.
static cl::opt< bool > EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true), cl::desc("Rebalance address calculation trees to improve " "instruction selection"))
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
bool SelectBrevLdIntrinsic(SDNode *IntN)
SDNode * StoreInstrForLoadIntrinsic(MachineSDNode *LoadN, SDNode *IntN)
This class is used to represent ISD::STORE nodes.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
const SDValue & getBasePtr() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:120
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool SelectAnyImm0(SDValue &N, SDValue &R)
bool SelectAnyInt(SDValue &N, SDValue &R)
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:124
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:886
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions...
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
const SDValue & getOffset() const
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool isValidAutoIncImm(const EVT VT, const int Offset) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
const APInt & getAPIntValue() const
bool SelectAnyImm2(SDValue &N, SDValue &R)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
Extended Value Type.
Definition: ValueTypes.h:34
void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl)
const MachineBasicBlock & front() const
size_t size() const
Definition: SmallVector.h:53
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isOpcodeHandled(const SDNode *N)
void dump() const
Dump this node, for debugging.
bool isHVXVectorType(MVT VecTy, bool IncludeBool=false) const
bool SelectAddrGA(SDValue &N, SDValue &R)
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1045
ilist< SDNode >::size_type allnodes_size() const
Definition: SelectionDAG.h:442
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP, uint32_t LogAlign)
unsigned estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
void dump() const
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:800
virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const
EVT is not used in-tree, but is used by out-of-tree target.
An SDNode that represents everything that will be needed to construct a MachineInstr.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:376
CHAIN = SC CHAIN, Imm128 - System call.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Represents one node in the SelectionDAG.
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:434
const Function & getFunction() const
Return the LLVM function that this machine code represents.
bool SelectAnyImmediate(SDValue &N, SDValue &R, uint32_t LogAlign)
unsigned logBase2() const
Definition: APInt.h:1741
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
size_t use_size() const
Return the number of uses of this node.
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:390
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:463
iterator_range< user_iterator > users()
Definition: Value.h:399
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:438
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:441
static cl::opt< bool > RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden, cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"))
bool SelectAnyImm3(SDValue &N, SDValue &R)
bool SelectAnyImm1(SDValue &N, SDValue &R)
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:435
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:128
const SDValue & getValue() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:700
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
bool SelectAddrGP(SDValue &N, SDValue &R)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:456
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:575
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool tryLoadOfLoadIntrinsic(LoadSDNode *N)
FunctionPass * createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOpt::Level OptLevel)
unsigned getOpcode() const
unsigned CreateReg(MVT VT)
CreateReg - Allocate a single virtual register for the given type.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:141
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
const TargetLowering * getTargetLowering() const
bool SelectAnyImm(SDValue &N, SDValue &R)
This class is used to form a handle around another node that is persistent and is updated across invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:73
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
static const Function * getParent(const Value *V)
bool isNonTemporal() const
const APFloat & getValueAPF() const
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
APInt bitcastToAPInt() const
Definition: APFloat.h:1094
Conversion operators.
Definition: ISDOpcodes.h:435
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
MachineSDNode * LoadInstrForLoadIntrinsic(SDNode *IntN)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
#define LLVM_DEBUG(X)
Definition: Debug.h:119
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num)
allocateMemRefsArray - Allocate an array to hold MachineMemOperand pointers.
FunctionLoweringInfo * FuncInfo
#define T1
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:617
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:866
This class is used to represent ISD::LOAD nodes.