LLVM  9.0.0svn
HexagonISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the Hexagon target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "Hexagon.h"
14 #include "HexagonISelDAGToDAG.h"
15 #include "HexagonISelLowering.h"
17 #include "HexagonTargetMachine.h"
21 #include "llvm/IR/Intrinsics.h"
23 #include "llvm/Support/Debug.h"
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "hexagon-isel"
27 
28 static
30 EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true),
31  cl::desc("Rebalance address calculation trees to improve "
32  "instruction selection"));
33 
34 // Rebalance only if this allows e.g. combining a GA with an offset or
35 // factoring out a shift.
36 static
38 RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false),
39  cl::desc("Rebalance address tree only if this allows optimizations"));
40 
41 static
43 RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden,
44  cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));
45 
46 static cl::opt<bool> CheckSingleUse("hexagon-isel-su", cl::Hidden,
47  cl::init(true), cl::desc("Enable checking of SDNode's single-use status"));
48 
49 //===----------------------------------------------------------------------===//
50 // Instruction Selector Implementation
51 //===----------------------------------------------------------------------===//
52 
53 #define GET_DAGISEL_BODY HexagonDAGToDAGISel
54 #include "HexagonGenDAGISel.inc"
55 
56 /// createHexagonISelDag - This pass converts a legalized DAG into a
57 /// Hexagon-specific DAG, ready for instruction scheduling.
58 ///
59 namespace llvm {
61  CodeGenOpt::Level OptLevel) {
62  return new HexagonDAGToDAGISel(TM, OptLevel);
63 }
64 }
65 
67  SDValue Chain = LD->getChain();
68  SDValue Base = LD->getBasePtr();
69  SDValue Offset = LD->getOffset();
70  int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
71  EVT LoadedVT = LD->getMemoryVT();
72  unsigned Opcode = 0;
73 
74  // Check for zero extended loads. Treat any-extend loads as zero extended
75  // loads.
77  bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
78  bool IsValidInc = HII->isValidAutoIncImm(LoadedVT, Inc);
79 
80  assert(LoadedVT.isSimple());
81  switch (LoadedVT.getSimpleVT().SimpleTy) {
82  case MVT::i8:
83  if (IsZeroExt)
84  Opcode = IsValidInc ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrub_io;
85  else
86  Opcode = IsValidInc ? Hexagon::L2_loadrb_pi : Hexagon::L2_loadrb_io;
87  break;
88  case MVT::i16:
89  if (IsZeroExt)
90  Opcode = IsValidInc ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadruh_io;
91  else
92  Opcode = IsValidInc ? Hexagon::L2_loadrh_pi : Hexagon::L2_loadrh_io;
93  break;
94  case MVT::i32:
95  case MVT::f32:
96  case MVT::v2i16:
97  case MVT::v4i8:
98  Opcode = IsValidInc ? Hexagon::L2_loadri_pi : Hexagon::L2_loadri_io;
99  break;
100  case MVT::i64:
101  case MVT::f64:
102  case MVT::v2i32:
103  case MVT::v4i16:
104  case MVT::v8i8:
105  Opcode = IsValidInc ? Hexagon::L2_loadrd_pi : Hexagon::L2_loadrd_io;
106  break;
107  case MVT::v64i8:
108  case MVT::v32i16:
109  case MVT::v16i32:
110  case MVT::v8i64:
111  case MVT::v128i8:
112  case MVT::v64i16:
113  case MVT::v32i32:
114  case MVT::v16i64:
115  if (isAlignedMemNode(LD)) {
116  if (LD->isNonTemporal())
117  Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai;
118  else
119  Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai;
120  } else {
121  Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai;
122  }
123  break;
124  default:
125  llvm_unreachable("Unexpected memory type in indexed load");
126  }
127 
128  SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
129  MachineMemOperand *MemOp = LD->getMemOperand();
130 
131  auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl)
132  -> MachineSDNode* {
133  if (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD) {
134  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
135  return CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64,
136  Zero, SDValue(N, 0));
137  }
138  if (ExtType == ISD::SEXTLOAD)
139  return CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
140  SDValue(N, 0));
141  return N;
142  };
143 
144  // Loaded value Next address Chain
145  SDValue From[3] = { SDValue(LD,0), SDValue(LD,1), SDValue(LD,2) };
146  SDValue To[3];
147 
148  EVT ValueVT = LD->getValueType(0);
149  if (ValueVT == MVT::i64 && ExtType != ISD::NON_EXTLOAD) {
150  // A load extending to i64 will actually produce i32, which will then
151  // need to be extended to i64.
152  assert(LoadedVT.getSizeInBits() <= 32);
153  ValueVT = MVT::i32;
154  }
155 
156  if (IsValidInc) {
157  MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT,
158  MVT::i32, MVT::Other, Base,
159  IncV, Chain);
160  CurDAG->setNodeMemRefs(L, {MemOp});
161  To[1] = SDValue(L, 1); // Next address.
162  To[2] = SDValue(L, 2); // Chain.
163  // Handle special case for extension to i64.
164  if (LD->getValueType(0) == MVT::i64)
165  L = getExt64(L, dl);
166  To[0] = SDValue(L, 0); // Loaded (extended) value.
167  } else {
168  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
169  MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other,
170  Base, Zero, Chain);
171  CurDAG->setNodeMemRefs(L, {MemOp});
172  To[2] = SDValue(L, 1); // Chain.
173  MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
174  Base, IncV);
175  To[1] = SDValue(A, 0); // Next address.
176  // Handle special case for extension to i64.
177  if (LD->getValueType(0) == MVT::i64)
178  L = getExt64(L, dl);
179  To[0] = SDValue(L, 0); // Loaded (extended) value.
180  }
181  ReplaceUses(From, To, 3);
182  CurDAG->RemoveDeadNode(LD);
183 }
184 
186  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
187  return nullptr;
188 
189  SDLoc dl(IntN);
190  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
191 
192  static std::map<unsigned,unsigned> LoadPciMap = {
193  { Intrinsic::hexagon_circ_ldb, Hexagon::L2_loadrb_pci },
194  { Intrinsic::hexagon_circ_ldub, Hexagon::L2_loadrub_pci },
195  { Intrinsic::hexagon_circ_ldh, Hexagon::L2_loadrh_pci },
196  { Intrinsic::hexagon_circ_lduh, Hexagon::L2_loadruh_pci },
197  { Intrinsic::hexagon_circ_ldw, Hexagon::L2_loadri_pci },
198  { Intrinsic::hexagon_circ_ldd, Hexagon::L2_loadrd_pci },
199  };
200  auto FLC = LoadPciMap.find(IntNo);
201  if (FLC != LoadPciMap.end()) {
202  EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i64 : MVT::i32;
203  EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
204  // Operands: { Base, Increment, Modifier, Chain }
205  auto Inc = cast<ConstantSDNode>(IntN->getOperand(5));
206  SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), dl, MVT::i32);
207  MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys,
208  { IntN->getOperand(2), I, IntN->getOperand(4),
209  IntN->getOperand(0) });
210  return Res;
211  }
212 
213  return nullptr;
214 }
215 
217  SDNode *IntN) {
218  // The "LoadN" is just a machine load instruction. The intrinsic also
219  // involves storing it. Generate an appropriate store to the location
220  // given in the intrinsic's operand(3).
221  uint64_t F = HII->get(LoadN->getMachineOpcode()).TSFlags;
222  unsigned SizeBits = (F >> HexagonII::MemAccessSizePos) &
224  unsigned Size = 1U << (SizeBits-1);
225 
226  SDLoc dl(IntN);
228  SDValue TS;
229  SDValue Loc = IntN->getOperand(3);
230 
231  if (Size >= 4)
232  TS = CurDAG->getStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, PI,
233  Size);
234  else
235  TS = CurDAG->getTruncStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc,
236  PI, MVT::getIntegerVT(Size * 8), Size);
237 
238  SDNode *StoreN;
239  {
240  HandleSDNode Handle(TS);
241  SelectStore(TS.getNode());
242  StoreN = Handle.getValue().getNode();
243  }
244 
245  // Load's results are { Loaded value, Updated pointer, Chain }
246  ReplaceUses(SDValue(IntN, 0), SDValue(LoadN, 1));
247  ReplaceUses(SDValue(IntN, 1), SDValue(StoreN, 0));
248  return StoreN;
249 }
250 
252  // The intrinsics for load circ/brev perform two operations:
253  // 1. Load a value V from the specified location, using the addressing
254  // mode corresponding to the intrinsic.
255  // 2. Store V into a specified location. This location is typically a
256  // local, temporary object.
257  // In many cases, the program using these intrinsics will immediately
258  // load V again from the local object. In those cases, when certain
259  // conditions are met, the last load can be removed.
260  // This function identifies and optimizes this pattern. If the pattern
261  // cannot be optimized, it returns nullptr, which will cause the load
262  // to be selected separately from the intrinsic (which will be handled
263  // in SelectIntrinsicWChain).
264 
265  SDValue Ch = N->getOperand(0);
266  SDValue Loc = N->getOperand(1);
267 
268  // Assume that the load and the intrinsic are connected directly with a
269  // chain:
270  // t1: i32,ch = int.load ..., ..., ..., Loc, ... // <-- C
271  // t2: i32,ch = load t1:1, Loc, ...
272  SDNode *C = Ch.getNode();
273 
274  if (C->getOpcode() != ISD::INTRINSIC_W_CHAIN)
275  return false;
276 
277  // The second load can only be eliminated if its extension type matches
278  // that of the load instruction corresponding to the intrinsic. The user
279  // can provide an address of an unsigned variable to store the result of
280  // a sign-extending intrinsic into (or the other way around).
281  ISD::LoadExtType IntExt;
282  switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) {
283  case Intrinsic::hexagon_circ_ldub:
284  case Intrinsic::hexagon_circ_lduh:
285  IntExt = ISD::ZEXTLOAD;
286  break;
287  case Intrinsic::hexagon_circ_ldw:
288  case Intrinsic::hexagon_circ_ldd:
289  IntExt = ISD::NON_EXTLOAD;
290  break;
291  default:
292  IntExt = ISD::SEXTLOAD;
293  break;
294  }
295  if (N->getExtensionType() != IntExt)
296  return false;
297 
298  // Make sure the target location for the loaded value in the load intrinsic
299  // is the location from which LD (or N) is loading.
300  if (C->getNumOperands() < 4 || Loc.getNode() != C->getOperand(3).getNode())
301  return false;
302 
305  SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) };
306  SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) };
307  ReplaceUses(F, T, array_lengthof(T));
308  // This transformation will leave the intrinsic dead. If it remains in
309  // the DAG, the selection code will see it again, but without the load,
310  // and it will generate a store that is normally required for it.
312  return true;
313  }
314  return false;
315 }
316 
317 // Convert the bit-reverse load intrinsic to appropriate target instruction.
319  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
320  return false;
321 
322  const SDLoc &dl(IntN);
323  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
324 
325  static const std::map<unsigned, unsigned> LoadBrevMap = {
326  { Intrinsic::hexagon_L2_loadrb_pbr, Hexagon::L2_loadrb_pbr },
327  { Intrinsic::hexagon_L2_loadrub_pbr, Hexagon::L2_loadrub_pbr },
328  { Intrinsic::hexagon_L2_loadrh_pbr, Hexagon::L2_loadrh_pbr },
329  { Intrinsic::hexagon_L2_loadruh_pbr, Hexagon::L2_loadruh_pbr },
330  { Intrinsic::hexagon_L2_loadri_pbr, Hexagon::L2_loadri_pbr },
331  { Intrinsic::hexagon_L2_loadrd_pbr, Hexagon::L2_loadrd_pbr }
332  };
333  auto FLI = LoadBrevMap.find(IntNo);
334  if (FLI != LoadBrevMap.end()) {
335  EVT ValTy =
336  (IntNo == Intrinsic::hexagon_L2_loadrd_pbr) ? MVT::i64 : MVT::i32;
337  EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
338  // Operands of Intrinsic: {chain, enum ID of intrinsic, baseptr,
339  // modifier}.
340  // Operands of target instruction: { Base, Modifier, Chain }.
342  FLI->second, dl, RTys,
343  {IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(0)});
344 
345  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
346  CurDAG->setNodeMemRefs(Res, {MemOp});
347 
348  ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
349  ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
350  ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
351  CurDAG->RemoveDeadNode(IntN);
352  return true;
353  }
354  return false;
355 }
356 
357 /// Generate a machine instruction node for the new circlar buffer intrinsics.
358 /// The new versions use a CSx register instead of the K field.
360  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
361  return false;
362 
363  SDLoc DL(IntN);
364  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
366 
367  static std::map<unsigned,unsigned> LoadNPcMap = {
368  { Intrinsic::hexagon_L2_loadrub_pci, Hexagon::PS_loadrub_pci },
369  { Intrinsic::hexagon_L2_loadrb_pci, Hexagon::PS_loadrb_pci },
370  { Intrinsic::hexagon_L2_loadruh_pci, Hexagon::PS_loadruh_pci },
371  { Intrinsic::hexagon_L2_loadrh_pci, Hexagon::PS_loadrh_pci },
372  { Intrinsic::hexagon_L2_loadri_pci, Hexagon::PS_loadri_pci },
373  { Intrinsic::hexagon_L2_loadrd_pci, Hexagon::PS_loadrd_pci },
374  { Intrinsic::hexagon_L2_loadrub_pcr, Hexagon::PS_loadrub_pcr },
375  { Intrinsic::hexagon_L2_loadrb_pcr, Hexagon::PS_loadrb_pcr },
376  { Intrinsic::hexagon_L2_loadruh_pcr, Hexagon::PS_loadruh_pcr },
377  { Intrinsic::hexagon_L2_loadrh_pcr, Hexagon::PS_loadrh_pcr },
378  { Intrinsic::hexagon_L2_loadri_pcr, Hexagon::PS_loadri_pcr },
379  { Intrinsic::hexagon_L2_loadrd_pcr, Hexagon::PS_loadrd_pcr }
380  };
381  auto FLI = LoadNPcMap.find (IntNo);
382  if (FLI != LoadNPcMap.end()) {
383  EVT ValTy = MVT::i32;
384  if (IntNo == Intrinsic::hexagon_L2_loadrd_pci ||
385  IntNo == Intrinsic::hexagon_L2_loadrd_pcr)
386  ValTy = MVT::i64;
387  EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
388  // Handle load.*_pci case which has 6 operands.
389  if (IntN->getNumOperands() == 6) {
390  auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
391  SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
392  // Operands: { Base, Increment, Modifier, Start, Chain }.
393  Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
394  IntN->getOperand(0) };
395  } else
396  // Handle load.*_pcr case which has 5 operands.
397  // Operands: { Base, Modifier, Start, Chain }.
398  Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
399  IntN->getOperand(0) };
400  MachineSDNode *Res = CurDAG->getMachineNode(FLI->second, DL, RTys, Ops);
401  ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
402  ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
403  ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
404  CurDAG->RemoveDeadNode(IntN);
405  return true;
406  }
407 
408  static std::map<unsigned,unsigned> StoreNPcMap = {
409  { Intrinsic::hexagon_S2_storerb_pci, Hexagon::PS_storerb_pci },
410  { Intrinsic::hexagon_S2_storerh_pci, Hexagon::PS_storerh_pci },
411  { Intrinsic::hexagon_S2_storerf_pci, Hexagon::PS_storerf_pci },
412  { Intrinsic::hexagon_S2_storeri_pci, Hexagon::PS_storeri_pci },
413  { Intrinsic::hexagon_S2_storerd_pci, Hexagon::PS_storerd_pci },
414  { Intrinsic::hexagon_S2_storerb_pcr, Hexagon::PS_storerb_pcr },
415  { Intrinsic::hexagon_S2_storerh_pcr, Hexagon::PS_storerh_pcr },
416  { Intrinsic::hexagon_S2_storerf_pcr, Hexagon::PS_storerf_pcr },
417  { Intrinsic::hexagon_S2_storeri_pcr, Hexagon::PS_storeri_pcr },
418  { Intrinsic::hexagon_S2_storerd_pcr, Hexagon::PS_storerd_pcr }
419  };
420  auto FSI = StoreNPcMap.find (IntNo);
421  if (FSI != StoreNPcMap.end()) {
422  EVT RTys[] = { MVT::i32, MVT::Other };
423  // Handle store.*_pci case which has 7 operands.
424  if (IntN->getNumOperands() == 7) {
425  auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
426  SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
427  // Operands: { Base, Increment, Modifier, Value, Start, Chain }.
428  Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
429  IntN->getOperand(6), IntN->getOperand(0) };
430  } else
431  // Handle store.*_pcr case which has 6 operands.
432  // Operands: { Base, Modifier, Value, Start, Chain }.
433  Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
434  IntN->getOperand(5), IntN->getOperand(0) };
435  MachineSDNode *Res = CurDAG->getMachineNode(FSI->second, DL, RTys, Ops);
436  ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
437  ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
438  CurDAG->RemoveDeadNode(IntN);
439  return true;
440  }
441 
442  return false;
443 }
444 
446  SDLoc dl(N);
447  LoadSDNode *LD = cast<LoadSDNode>(N);
448 
449  // Handle indexed loads.
451  if (AM != ISD::UNINDEXED) {
452  SelectIndexedLoad(LD, dl);
453  return;
454  }
455 
456  // Handle patterns using circ/brev load intrinsics.
457  if (tryLoadOfLoadIntrinsic(LD))
458  return;
459 
460  SelectCode(LD);
461 }
462 
464  SDValue Chain = ST->getChain();
465  SDValue Base = ST->getBasePtr();
466  SDValue Offset = ST->getOffset();
467  SDValue Value = ST->getValue();
468  // Get the constant value.
469  int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
470  EVT StoredVT = ST->getMemoryVT();
471  EVT ValueVT = Value.getValueType();
472 
473  bool IsValidInc = HII->isValidAutoIncImm(StoredVT, Inc);
474  unsigned Opcode = 0;
475 
476  assert(StoredVT.isSimple());
477  switch (StoredVT.getSimpleVT().SimpleTy) {
478  case MVT::i8:
479  Opcode = IsValidInc ? Hexagon::S2_storerb_pi : Hexagon::S2_storerb_io;
480  break;
481  case MVT::i16:
482  Opcode = IsValidInc ? Hexagon::S2_storerh_pi : Hexagon::S2_storerh_io;
483  break;
484  case MVT::i32:
485  case MVT::f32:
486  case MVT::v2i16:
487  case MVT::v4i8:
488  Opcode = IsValidInc ? Hexagon::S2_storeri_pi : Hexagon::S2_storeri_io;
489  break;
490  case MVT::i64:
491  case MVT::f64:
492  case MVT::v2i32:
493  case MVT::v4i16:
494  case MVT::v8i8:
495  Opcode = IsValidInc ? Hexagon::S2_storerd_pi : Hexagon::S2_storerd_io;
496  break;
497  case MVT::v64i8:
498  case MVT::v32i16:
499  case MVT::v16i32:
500  case MVT::v8i64:
501  case MVT::v128i8:
502  case MVT::v64i16:
503  case MVT::v32i32:
504  case MVT::v16i64:
505  if (isAlignedMemNode(ST)) {
506  if (ST->isNonTemporal())
507  Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai;
508  else
509  Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai;
510  } else {
511  Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai;
512  }
513  break;
514  default:
515  llvm_unreachable("Unexpected memory type in indexed store");
516  }
517 
518  if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
519  assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
520  Value = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo,
521  dl, MVT::i32, Value);
522  }
523 
524  SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
525  MachineMemOperand *MemOp = ST->getMemOperand();
526 
527  // Next address Chain
528  SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) };
529  SDValue To[2];
530 
531  if (IsValidInc) {
532  // Build post increment store.
533  SDValue Ops[] = { Base, IncV, Value, Chain };
535  Ops);
536  CurDAG->setNodeMemRefs(S, {MemOp});
537  To[0] = SDValue(S, 0);
538  To[1] = SDValue(S, 1);
539  } else {
540  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
541  SDValue Ops[] = { Base, Zero, Value, Chain };
542  MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
543  CurDAG->setNodeMemRefs(S, {MemOp});
544  To[1] = SDValue(S, 0);
545  MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
546  Base, IncV);
547  To[0] = SDValue(A, 0);
548  }
549 
550  ReplaceUses(From, To, 2);
551  CurDAG->RemoveDeadNode(ST);
552 }
553 
555  SDLoc dl(N);
556  StoreSDNode *ST = cast<StoreSDNode>(N);
557 
558  // Handle indexed stores.
560  if (AM != ISD::UNINDEXED) {
561  SelectIndexedStore(ST, dl);
562  return;
563  }
564 
565  SelectCode(ST);
566 }
567 
569  SDLoc dl(N);
570  SDValue Shl_0 = N->getOperand(0);
571  SDValue Shl_1 = N->getOperand(1);
572 
573  auto Default = [this,N] () -> void { SelectCode(N); };
574 
575  if (N->getValueType(0) != MVT::i32 || Shl_1.getOpcode() != ISD::Constant)
576  return Default();
577 
578  // RHS is const.
579  int32_t ShlConst = cast<ConstantSDNode>(Shl_1)->getSExtValue();
580 
581  if (Shl_0.getOpcode() == ISD::MUL) {
582  SDValue Mul_0 = Shl_0.getOperand(0); // Val
583  SDValue Mul_1 = Shl_0.getOperand(1); // Const
584  // RHS of mul is const.
585  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mul_1)) {
586  int32_t ValConst = C->getSExtValue() << ShlConst;
587  if (isInt<9>(ValConst)) {
588  SDValue Val = CurDAG->getTargetConstant(ValConst, dl, MVT::i32);
589  SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
590  MVT::i32, Mul_0, Val);
591  ReplaceNode(N, Result);
592  return;
593  }
594  }
595  return Default();
596  }
597 
598  if (Shl_0.getOpcode() == ISD::SUB) {
599  SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
600  SDValue Sub_1 = Shl_0.getOperand(1); // Val
601  if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Sub_0)) {
602  if (C1->getSExtValue() != 0 || Sub_1.getOpcode() != ISD::SHL)
603  return Default();
604  SDValue Shl2_0 = Sub_1.getOperand(0); // Val
605  SDValue Shl2_1 = Sub_1.getOperand(1); // Const
606  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(Shl2_1)) {
607  int32_t ValConst = 1 << (ShlConst + C2->getSExtValue());
608  if (isInt<9>(-ValConst)) {
609  SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, MVT::i32);
610  SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
611  MVT::i32, Shl2_0, Val);
612  ReplaceNode(N, Result);
613  return;
614  }
615  }
616  }
617  }
618 
619  return Default();
620 }
621 
622 //
623 // Handling intrinsics for circular load and bitreverse load.
624 //
629  return;
630  }
631 
632  // Handle bit-reverse load intrinsics.
633  if (SelectBrevLdIntrinsic(N))
634  return;
635 
636  if (SelectNewCircIntrinsic(N))
637  return;
638 
639  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
640  if (IntNo == Intrinsic::hexagon_V6_vgathermw ||
641  IntNo == Intrinsic::hexagon_V6_vgathermw_128B ||
642  IntNo == Intrinsic::hexagon_V6_vgathermh ||
643  IntNo == Intrinsic::hexagon_V6_vgathermh_128B ||
644  IntNo == Intrinsic::hexagon_V6_vgathermhw ||
645  IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) {
646  SelectV65Gather(N);
647  return;
648  }
649  if (IntNo == Intrinsic::hexagon_V6_vgathermwq ||
650  IntNo == Intrinsic::hexagon_V6_vgathermwq_128B ||
651  IntNo == Intrinsic::hexagon_V6_vgathermhq ||
652  IntNo == Intrinsic::hexagon_V6_vgathermhq_128B ||
653  IntNo == Intrinsic::hexagon_V6_vgathermhwq ||
654  IntNo == Intrinsic::hexagon_V6_vgathermhwq_128B) {
656  return;
657  }
658 
659  SelectCode(N);
660 }
661 
663  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
664  unsigned Bits;
665  switch (IID) {
666  case Intrinsic::hexagon_S2_vsplatrb:
667  Bits = 8;
668  break;
669  case Intrinsic::hexagon_S2_vsplatrh:
670  Bits = 16;
671  break;
672  case Intrinsic::hexagon_V6_vaddcarry:
673  case Intrinsic::hexagon_V6_vaddcarry_128B:
674  case Intrinsic::hexagon_V6_vsubcarry:
675  case Intrinsic::hexagon_V6_vsubcarry_128B:
677  return;
678  default:
679  SelectCode(N);
680  return;
681  }
682 
683  SDValue V = N->getOperand(1);
684  SDValue U;
685  if (keepsLowBits(V, Bits, U)) {
686  SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
687  N->getOperand(0), U);
688  ReplaceNode(N, R.getNode());
689  SelectCode(R.getNode());
690  return;
691  }
692  SelectCode(N);
693 }
694 
695 //
696 // Map floating point constant values.
697 //
699  SDLoc dl(N);
701  APInt A = CN->getValueAPF().bitcastToAPInt();
702  if (N->getValueType(0) == MVT::f32) {
703  SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32);
704  ReplaceNode(N, CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, V));
705  return;
706  }
707  if (N->getValueType(0) == MVT::f64) {
708  SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i64);
709  ReplaceNode(N, CurDAG->getMachineNode(Hexagon::CONST64, dl, MVT::f64, V));
710  return;
711  }
712 
713  SelectCode(N);
714 }
715 
716 //
717 // Map boolean values.
718 //
720  if (N->getValueType(0) == MVT::i1) {
721  assert(!(cast<ConstantSDNode>(N)->getZExtValue() >> 1));
722  unsigned Opc = (cast<ConstantSDNode>(N)->getSExtValue() != 0)
723  ? Hexagon::PS_true
724  : Hexagon::PS_false;
726  return;
727  }
728 
729  SelectCode(N);
730 }
731 
733  MachineFrameInfo &MFI = MF->getFrameInfo();
734  const HexagonFrameLowering *HFI = HST->getFrameLowering();
735  int FX = cast<FrameIndexSDNode>(N)->getIndex();
736  unsigned StkA = HFI->getStackAlignment();
737  unsigned MaxA = MFI.getMaxAlignment();
739  SDLoc DL(N);
740  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
741  SDNode *R = nullptr;
742 
743  // Use PS_fi when:
744  // - the object is fixed, or
745  // - there are no objects with higher-than-default alignment, or
746  // - there are no dynamically allocated objects.
747  // Otherwise, use PS_fia.
748  if (FX < 0 || MaxA <= StkA || !MFI.hasVarSizedObjects()) {
749  R = CurDAG->getMachineNode(Hexagon::PS_fi, DL, MVT::i32, FI, Zero);
750  } else {
751  auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
752  unsigned AR = HMFI.getStackAlignBaseVReg();
753  SDValue CH = CurDAG->getEntryNode();
754  SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
755  R = CurDAG->getMachineNode(Hexagon::PS_fia, DL, MVT::i32, Ops);
756  }
757 
758  ReplaceNode(N, R);
759 }
760 
762  unsigned OpcCarry = N->getOpcode() == HexagonISD::ADDC ? Hexagon::A4_addp_c
763  : Hexagon::A4_subp_c;
764  SDNode *C = CurDAG->getMachineNode(OpcCarry, SDLoc(N), N->getVTList(),
765  { N->getOperand(0), N->getOperand(1),
766  N->getOperand(2) });
767  ReplaceNode(N, C);
768 }
769 
771  MVT ResTy = N->getValueType(0).getSimpleVT();
772  if (HST->isHVXVectorType(ResTy, true))
773  return SelectHvxVAlign(N);
774 
775  const SDLoc &dl(N);
776  unsigned VecLen = ResTy.getSizeInBits();
777  if (VecLen == 32) {
778  SDValue Ops[] = {
779  CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
780  N->getOperand(0),
781  CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
782  N->getOperand(1),
783  CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
784  };
785  SDNode *R = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
786  MVT::i64, Ops);
787 
788  // Shift right by "(Addr & 0x3) * 8" bytes.
789  SDValue M0 = CurDAG->getTargetConstant(0x18, dl, MVT::i32);
790  SDValue M1 = CurDAG->getTargetConstant(0x03, dl, MVT::i32);
791  SDNode *C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32,
792  M0, N->getOperand(2), M1);
793  SDNode *S = CurDAG->getMachineNode(Hexagon::S2_lsr_r_p, dl, MVT::i64,
794  SDValue(R, 0), SDValue(C, 0));
795  SDValue E = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, ResTy,
796  SDValue(S, 0));
797  ReplaceNode(N, E.getNode());
798  } else {
799  assert(VecLen == 64);
800  SDNode *Pu = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::v8i1,
801  N->getOperand(2));
802  SDNode *VA = CurDAG->getMachineNode(Hexagon::S2_valignrb, dl, ResTy,
803  N->getOperand(0), N->getOperand(1),
804  SDValue(Pu,0));
805  ReplaceNode(N, VA);
806  }
807 }
808 
810  const SDLoc &dl(N);
811  SDValue A = N->getOperand(1);
812  int Mask = -cast<ConstantSDNode>(A.getNode())->getSExtValue();
813  assert(isPowerOf2_32(-Mask));
814 
815  SDValue M = CurDAG->getTargetConstant(Mask, dl, MVT::i32);
816  SDNode *AA = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
817  N->getOperand(0), M);
818  ReplaceNode(N, AA);
819 }
820 
821 // Handle these nodes here to avoid having to write patterns for all
822 // combinations of input/output types. In all cases, the resulting
823 // instruction is the same.
825  SDValue Op = N->getOperand(0);
826  MVT OpTy = Op.getValueType().getSimpleVT();
827  SDNode *T = CurDAG->MorphNodeTo(N, N->getOpcode(),
828  CurDAG->getVTList(OpTy), {Op});
829  ReplaceNode(T, Op.getNode());
830 }
831 
833  MVT ResTy = N->getValueType(0).getSimpleVT();
834  SDNode *T = CurDAG->getMachineNode(Hexagon::C2_mask, SDLoc(N), ResTy,
835  N->getOperand(0));
836  ReplaceNode(N, T);
837 }
838 
840  const SDLoc &dl(N);
841  MVT ResTy = N->getValueType(0).getSimpleVT();
842  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
843  SDNode *T = CurDAG->getMachineNode(Hexagon::A4_vcmpbgtui, dl, ResTy,
844  N->getOperand(0), Zero);
845  ReplaceNode(N, T);
846 }
847 
849  const SDLoc &dl(N);
850  MVT ResTy = N->getValueType(0).getSimpleVT();
851  // The argument to V2Q should be a single vector.
852  MVT OpTy = N->getOperand(0).getValueType().getSimpleVT(); (void)OpTy;
853  assert(HST->getVectorLength() * 8 == OpTy.getSizeInBits());
854 
856  SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
857  SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandvrt, dl, ResTy,
858  N->getOperand(0), SDValue(R,0));
859  ReplaceNode(N, T);
860 }
861 
863  const SDLoc &dl(N);
864  MVT ResTy = N->getValueType(0).getSimpleVT();
865  // The result of V2Q should be a single vector.
866  assert(HST->getVectorLength() * 8 == ResTy.getSizeInBits());
867 
869  SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
870  SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandqrt, dl, ResTy,
871  N->getOperand(0), SDValue(R,0));
872  ReplaceNode(N, T);
873 }
874 
876  if (N->isMachineOpcode())
877  return N->setNodeId(-1); // Already selected.
878 
879  switch (N->getOpcode()) {
880  case ISD::Constant: return SelectConstant(N);
881  case ISD::ConstantFP: return SelectConstantFP(N);
882  case ISD::FrameIndex: return SelectFrameIndex(N);
883  case ISD::SHL: return SelectSHL(N);
884  case ISD::LOAD: return SelectLoad(N);
885  case ISD::STORE: return SelectStore(N);
888 
889  case HexagonISD::ADDC:
890  case HexagonISD::SUBC: return SelectAddSubCarry(N);
891  case HexagonISD::VALIGN: return SelectVAlign(N);
893  case HexagonISD::TYPECAST: return SelectTypecast(N);
894  case HexagonISD::P2D: return SelectP2D(N);
895  case HexagonISD::D2P: return SelectD2P(N);
896  case HexagonISD::Q2V: return SelectQ2V(N);
897  case HexagonISD::V2Q: return SelectV2Q(N);
898  }
899 
900  if (HST->useHVXOps()) {
901  switch (N->getOpcode()) {
902  case ISD::VECTOR_SHUFFLE: return SelectHvxShuffle(N);
903  case HexagonISD::VROR: return SelectHvxRor(N);
904  }
905  }
906 
907  SelectCode(N);
908 }
909 
911 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
912  std::vector<SDValue> &OutOps) {
913  SDValue Inp = Op, Res;
914 
915  switch (ConstraintID) {
916  default:
917  return true;
919  case InlineAsm::Constraint_o: // Offsetable.
920  case InlineAsm::Constraint_v: // Not offsetable.
921  case InlineAsm::Constraint_m: // Memory.
922  if (SelectAddrFI(Inp, Res))
923  OutOps.push_back(Res);
924  else
925  OutOps.push_back(Inp);
926  break;
927  }
928 
929  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
930  return false;
931 }
932 
933 
934 static bool isMemOPCandidate(SDNode *I, SDNode *U) {
935  // I is an operand of U. Check if U is an arithmetic (binary) operation
936  // usable in a memop, where the other operand is a loaded value, and the
937  // result of U is stored in the same location.
938 
939  if (!U->hasOneUse())
940  return false;
941  unsigned Opc = U->getOpcode();
942  switch (Opc) {
943  case ISD::ADD:
944  case ISD::SUB:
945  case ISD::AND:
946  case ISD::OR:
947  break;
948  default:
949  return false;
950  }
951 
952  SDValue S0 = U->getOperand(0);
953  SDValue S1 = U->getOperand(1);
954  SDValue SY = (S0.getNode() == I) ? S1 : S0;
955 
956  SDNode *UUse = *U->use_begin();
957  if (UUse->getNumValues() != 1)
958  return false;
959 
960  // Check if one of the inputs to U is a load instruction and the output
961  // is used by a store instruction. If so and they also have the same
962  // base pointer, then don't preoprocess this node sequence as it
963  // can be matched to a memop.
964  SDNode *SYNode = SY.getNode();
965  if (UUse->getOpcode() == ISD::STORE && SYNode->getOpcode() == ISD::LOAD) {
966  SDValue LDBasePtr = cast<MemSDNode>(SYNode)->getBasePtr();
967  SDValue STBasePtr = cast<MemSDNode>(UUse)->getBasePtr();
968  if (LDBasePtr == STBasePtr)
969  return true;
970  }
971  return false;
972 }
973 
974 
975 // Transform: (or (select c x 0) z) -> (select c (or x z) z)
976 // (or (select c 0 y) z) -> (select c z (or y z))
977 void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
978  SelectionDAG &DAG = *CurDAG;
979 
980  for (auto I : Nodes) {
981  if (I->getOpcode() != ISD::OR)
982  continue;
983 
984  auto IsZero = [] (const SDValue &V) -> bool {
985  if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode()))
986  return SC->isNullValue();
987  return false;
988  };
989  auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
990  if (Op.getOpcode() != ISD::SELECT)
991  return false;
992  return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2));
993  };
994 
995  SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
996  EVT VT = I->getValueType(0);
997  bool SelN0 = IsSelect0(N0);
998  SDValue SOp = SelN0 ? N0 : N1;
999  SDValue VOp = SelN0 ? N1 : N0;
1000 
1001  if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) {
1002  SDValue SC = SOp.getOperand(0);
1003  SDValue SX = SOp.getOperand(1);
1004  SDValue SY = SOp.getOperand(2);
1005  SDLoc DLS = SOp;
1006  if (IsZero(SY)) {
1007  SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
1008  SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
1009  DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1010  } else if (IsZero(SX)) {
1011  SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
1012  SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
1013  DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1014  }
1015  }
1016  }
1017 }
1018 
1019 // Transform: (store ch val (add x (add (shl y c) e)))
1020 // to: (store ch val (add x (shl (add y d) c))),
1021 // where e = (shl d c) for some integer d.
1022 // The purpose of this is to enable generation of loads/stores with
1023 // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1024 // value c must be 0, 1 or 2.
1025 void HexagonDAGToDAGISel::ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes) {
1026  SelectionDAG &DAG = *CurDAG;
1027 
1028  for (auto I : Nodes) {
1029  if (I->getOpcode() != ISD::STORE)
1030  continue;
1031 
1032  // I matched: (store ch val Off)
1033  SDValue Off = I->getOperand(2);
1034  // Off needs to match: (add x (add (shl y c) (shl d c))))
1035  if (Off.getOpcode() != ISD::ADD)
1036  continue;
1037  // Off matched: (add x T0)
1038  SDValue T0 = Off.getOperand(1);
1039  // T0 needs to match: (add T1 T2):
1040  if (T0.getOpcode() != ISD::ADD)
1041  continue;
1042  // T0 matched: (add T1 T2)
1043  SDValue T1 = T0.getOperand(0);
1044  SDValue T2 = T0.getOperand(1);
1045  // T1 needs to match: (shl y c)
1046  if (T1.getOpcode() != ISD::SHL)
1047  continue;
1048  SDValue C = T1.getOperand(1);
1050  if (CN == nullptr)
1051  continue;
1052  unsigned CV = CN->getZExtValue();
1053  if (CV > 2)
1054  continue;
1055  // T2 needs to match e, where e = (shl d c) for some d.
1057  if (EN == nullptr)
1058  continue;
1059  unsigned EV = EN->getZExtValue();
1060  if (EV % (1 << CV) != 0)
1061  continue;
1062  unsigned DV = EV / (1 << CV);
1063 
1064  // Replace T0 with: (shl (add y d) c)
1065  SDLoc DL = SDLoc(I);
1066  EVT VT = T0.getValueType();
1067  SDValue D = DAG.getConstant(DV, DL, VT);
1068  // NewAdd = (add y d)
1069  SDValue NewAdd = DAG.getNode(ISD::ADD, DL, VT, T1.getOperand(0), D);
1070  // NewShl = (shl NewAdd c)
1071  SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
1072  ReplaceNode(T0.getNode(), NewShl.getNode());
1073  }
1074 }
1075 
1076 // Transform: (load ch (add x (and (srl y c) Mask)))
1077 // to: (load ch (add x (shl (srl y d) d-c)))
1078 // where
1079 // Mask = 00..0 111..1 0.0
1080 // | | +-- d-c 0s, and d-c is 0, 1 or 2.
1081 // | +-------- 1s
1082 // +-------------- at most c 0s
1083 // Motivating example:
1084 // DAG combiner optimizes (add x (shl (srl y 5) 2))
1085 // to (add x (and (srl y 3) 1FFFFFFC))
1086 // which results in a constant-extended and(##...,lsr). This transformation
1087 // undoes this simplification for cases where the shl can be folded into
1088 // an addressing mode.
1089 void HexagonDAGToDAGISel::ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes) {
1090  SelectionDAG &DAG = *CurDAG;
1091 
1092  for (SDNode *N : Nodes) {
1093  unsigned Opc = N->getOpcode();
1094  if (Opc != ISD::LOAD && Opc != ISD::STORE)
1095  continue;
1096  SDValue Addr = Opc == ISD::LOAD ? N->getOperand(1) : N->getOperand(2);
1097  // Addr must match: (add x T0)
1098  if (Addr.getOpcode() != ISD::ADD)
1099  continue;
1100  SDValue T0 = Addr.getOperand(1);
1101  // T0 must match: (and T1 Mask)
1102  if (T0.getOpcode() != ISD::AND)
1103  continue;
1104 
1105  // We have an AND.
1106  //
1107  // Check the first operand. It must be: (srl y c).
1108  SDValue S = T0.getOperand(0);
1109  if (S.getOpcode() != ISD::SRL)
1110  continue;
1112  if (SN == nullptr)
1113  continue;
1114  if (SN->getAPIntValue().getBitWidth() != 32)
1115  continue;
1116  uint32_t CV = SN->getZExtValue();
1117 
1118  // Check the second operand: the supposed mask.
1120  if (MN == nullptr)
1121  continue;
1122  if (MN->getAPIntValue().getBitWidth() != 32)
1123  continue;
1124  uint32_t Mask = MN->getZExtValue();
1125  // Examine the mask.
1126  uint32_t TZ = countTrailingZeros(Mask);
1127  uint32_t M1 = countTrailingOnes(Mask >> TZ);
1128  uint32_t LZ = countLeadingZeros(Mask);
1129  // Trailing zeros + middle ones + leading zeros must equal the width.
1130  if (TZ + M1 + LZ != 32)
1131  continue;
1132  // The number of trailing zeros will be encoded in the addressing mode.
1133  if (TZ > 2)
1134  continue;
1135  // The number of leading zeros must be at most c.
1136  if (LZ > CV)
1137  continue;
1138 
1139  // All looks good.
1140  SDValue Y = S.getOperand(0);
1141  EVT VT = Addr.getValueType();
1142  SDLoc dl(S);
1143  // TZ = D-C, so D = TZ+C.
1144  SDValue D = DAG.getConstant(TZ+CV, dl, VT);
1145  SDValue DC = DAG.getConstant(TZ, dl, VT);
1146  SDValue NewSrl = DAG.getNode(ISD::SRL, dl, VT, Y, D);
1147  SDValue NewShl = DAG.getNode(ISD::SHL, dl, VT, NewSrl, DC);
1148  ReplaceNode(T0.getNode(), NewShl.getNode());
1149  }
1150 }
1151 
1152 // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1153 // (op ... 1 ...))
1154 void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) {
1155  SelectionDAG &DAG = *CurDAG;
1156 
1157  for (SDNode *N : Nodes) {
1158  unsigned Opc = N->getOpcode();
1159  if (Opc != ISD::ZERO_EXTEND)
1160  continue;
1161  SDValue OpI1 = N->getOperand(0);
1162  EVT OpVT = OpI1.getValueType();
1163  if (!OpVT.isSimple() || OpVT.getSimpleVT() != MVT::i1)
1164  continue;
1165  for (auto I = N->use_begin(), E = N->use_end(); I != E; ++I) {
1166  SDNode *U = *I;
1167  if (U->getNumValues() != 1)
1168  continue;
1169  EVT UVT = U->getValueType(0);
1170  if (!UVT.isSimple() || !UVT.isInteger() || UVT.getSimpleVT() == MVT::i1)
1171  continue;
1172  if (isMemOPCandidate(N, U))
1173  continue;
1174 
1175  // Potentially simplifiable operation.
1176  unsigned I1N = I.getOperandNo();
1178  for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i)
1179  Ops[i] = U->getOperand(i);
1180  EVT BVT = Ops[I1N].getValueType();
1181 
1182  SDLoc dl(U);
1183  SDValue C0 = DAG.getConstant(0, dl, BVT);
1184  SDValue C1 = DAG.getConstant(1, dl, BVT);
1185  SDValue If0, If1;
1186 
1187  if (isa<MachineSDNode>(U)) {
1188  unsigned UseOpc = U->getMachineOpcode();
1189  Ops[I1N] = C0;
1190  If0 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1191  Ops[I1N] = C1;
1192  If1 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1193  } else {
1194  unsigned UseOpc = U->getOpcode();
1195  Ops[I1N] = C0;
1196  If0 = DAG.getNode(UseOpc, dl, UVT, Ops);
1197  Ops[I1N] = C1;
1198  If1 = DAG.getNode(UseOpc, dl, UVT, Ops);
1199  }
1200  SDValue Sel = DAG.getNode(ISD::SELECT, dl, UVT, OpI1, If1, If0);
1201  DAG.ReplaceAllUsesWith(U, Sel.getNode());
1202  }
1203  }
1204 }
1205 
1207  // Repack all nodes before calling each preprocessing function,
1208  // because each of them can modify the set of nodes.
1209  auto getNodes = [this] () -> std::vector<SDNode*> {
1210  std::vector<SDNode*> T;
1211  T.reserve(CurDAG->allnodes_size());
1212  for (SDNode &N : CurDAG->allnodes())
1213  T.push_back(&N);
1214  return T;
1215  };
1216 
1217  // Transform: (or (select c x 0) z) -> (select c (or x z) z)
1218  // (or (select c 0 y) z) -> (select c z (or y z))
1219  ppSimplifyOrSelect0(getNodes());
1220 
1221  // Transform: (store ch val (add x (add (shl y c) e)))
1222  // to: (store ch val (add x (shl (add y d) c))),
1223  // where e = (shl d c) for some integer d.
1224  // The purpose of this is to enable generation of loads/stores with
1225  // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1226  // value c must be 0, 1 or 2.
1227  ppAddrReorderAddShl(getNodes());
1228 
1229  // Transform: (load ch (add x (and (srl y c) Mask)))
1230  // to: (load ch (add x (shl (srl y d) d-c)))
1231  // where
1232  // Mask = 00..0 111..1 0.0
1233  // | | +-- d-c 0s, and d-c is 0, 1 or 2.
1234  // | +-------- 1s
1235  // +-------------- at most c 0s
1236  // Motivating example:
1237  // DAG combiner optimizes (add x (shl (srl y 5) 2))
1238  // to (add x (and (srl y 3) 1FFFFFFC))
1239  // which results in a constant-extended and(##...,lsr). This transformation
1240  // undoes this simplification for cases where the shl can be folded into
1241  // an addressing mode.
1242  ppAddrRewriteAndSrl(getNodes());
1243 
1244  // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1245  // (op ... 1 ...))
1246  ppHoistZextI1(getNodes());
1247 
1248  DEBUG_WITH_TYPE("isel", {
1249  dbgs() << "Preprocessed (Hexagon) selection DAG:";
1250  CurDAG->dump();
1251  });
1252 
1254  rebalanceAddressTrees();
1255 
1256  DEBUG_WITH_TYPE("isel", {
1257  dbgs() << "Address tree balanced selection DAG:";
1258  CurDAG->dump();
1259  });
1260  }
1261 }
1262 
1264  auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget());
1265  auto &HFI = *HST.getFrameLowering();
1266  if (!HFI.needsAligna(*MF))
1267  return;
1268 
1269  MachineFrameInfo &MFI = MF->getFrameInfo();
1270  MachineBasicBlock *EntryBB = &MF->front();
1271  unsigned AR = FuncInfo->CreateReg(MVT::i32);
1272  unsigned MaxA = MFI.getMaxAlignment();
1273  BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AR)
1274  .addImm(MaxA);
1275  MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
1276 }
1277 
1278 // Match a frame index that can be used in an addressing mode.
1280  if (N.getOpcode() != ISD::FrameIndex)
1281  return false;
1282  auto &HFI = *HST->getFrameLowering();
1283  MachineFrameInfo &MFI = MF->getFrameInfo();
1284  int FX = cast<FrameIndexSDNode>(N)->getIndex();
1285  if (!MFI.isFixedObjectIndex(FX) && HFI.needsAligna(*MF))
1286  return false;
1288  return true;
1289 }
1290 
1292  return SelectGlobalAddress(N, R, false, 0);
1293 }
1294 
1296  return SelectGlobalAddress(N, R, true, 0);
1297 }
1298 
1300  return SelectAnyImmediate(N, R, 0);
1301 }
1302 
1304  return SelectAnyImmediate(N, R, 0);
1305 }
1307  return SelectAnyImmediate(N, R, 1);
1308 }
1310  return SelectAnyImmediate(N, R, 2);
1311 }
1313  return SelectAnyImmediate(N, R, 3);
1314 }
1315 
1317  EVT T = N.getValueType();
1318  if (!T.isInteger() || T.getSizeInBits() != 32 || !isa<ConstantSDNode>(N))
1319  return false;
1320  R = N;
1321  return true;
1322 }
1323 
1325  uint32_t LogAlign) {
1326  auto IsAligned = [LogAlign] (uint64_t V) -> bool {
1327  return alignTo(V, (uint64_t)1 << LogAlign) == V;
1328  };
1329 
1330  switch (N.getOpcode()) {
1331  case ISD::Constant: {
1332  if (N.getValueType() != MVT::i32)
1333  return false;
1334  int32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
1335  if (!IsAligned(V))
1336  return false;
1337  R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
1338  return true;
1339  }
1340  case HexagonISD::JT:
1341  case HexagonISD::CP:
1342  // These are assumed to always be aligned at least 8-byte boundary.
1343  if (LogAlign > 3)
1344  return false;
1345  R = N.getOperand(0);
1346  return true;
1347  case ISD::ExternalSymbol:
1348  // Symbols may be aligned at any boundary.
1349  if (LogAlign > 0)
1350  return false;
1351  R = N;
1352  return true;
1353  case ISD::BlockAddress:
1354  // Block address is always aligned at least 4-byte boundary.
1355  if (LogAlign > 2 || !IsAligned(cast<BlockAddressSDNode>(N)->getOffset()))
1356  return false;
1357  R = N;
1358  return true;
1359  }
1360 
1361  if (SelectGlobalAddress(N, R, false, LogAlign) ||
1362  SelectGlobalAddress(N, R, true, LogAlign))
1363  return true;
1364 
1365  return false;
1366 }
1367 
1369  bool UseGP, uint32_t LogAlign) {
1370  auto IsAligned = [LogAlign] (uint64_t V) -> bool {
1371  return alignTo(V, (uint64_t)1 << LogAlign) == V;
1372  };
1373 
1374  switch (N.getOpcode()) {
1375  case ISD::ADD: {
1376  SDValue N0 = N.getOperand(0);
1377  SDValue N1 = N.getOperand(1);
1378  unsigned GAOpc = N0.getOpcode();
1379  if (UseGP && GAOpc != HexagonISD::CONST32_GP)
1380  return false;
1381  if (!UseGP && GAOpc != HexagonISD::CONST32)
1382  return false;
1383  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) {
1384  SDValue Addr = N0.getOperand(0);
1385  // For the purpose of alignment, sextvalue and zextvalue are the same.
1386  if (!IsAligned(Const->getZExtValue()))
1387  return false;
1388  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) {
1389  if (GA->getOpcode() == ISD::TargetGlobalAddress) {
1390  uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
1391  R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const),
1392  N.getValueType(), NewOff);
1393  return true;
1394  }
1395  }
1396  }
1397  break;
1398  }
1399  case HexagonISD::CP:
1400  case HexagonISD::JT:
1401  case HexagonISD::CONST32:
1402  // The operand(0) of CONST32 is TargetGlobalAddress, which is what we
1403  // want in the instruction.
1404  if (!UseGP)
1405  R = N.getOperand(0);
1406  return !UseGP;
1408  if (UseGP)
1409  R = N.getOperand(0);
1410  return UseGP;
1411  default:
1412  return false;
1413  }
1414 
1415  return false;
1416 }
1417 
1419  // This (complex pattern) function is meant to detect a sign-extension
1420  // i32->i64 on a per-operand basis. This would allow writing single
1421  // patterns that would cover a number of combinations of different ways
1422  // a sign-extensions could be written. For example:
1423  // (mul (DetectUseSxtw x) (DetectUseSxtw y)) -> (M2_dpmpyss_s0 x y)
1424  // could match either one of these:
1425  // (mul (sext x) (sext_inreg y))
1426  // (mul (sext-load *p) (sext_inreg y))
1427  // (mul (sext_inreg x) (sext y))
1428  // etc.
1429  //
1430  // The returned value will have type i64 and its low word will
1431  // contain the value being extended. The high bits are not specified.
1432  // The returned type is i64 because the original type of N was i64,
1433  // but the users of this function should only use the low-word of the
1434  // result, e.g.
1435  // (mul sxtw:x, sxtw:y) -> (M2_dpmpyss_s0 (LoReg sxtw:x), (LoReg sxtw:y))
1436 
1437  if (N.getValueType() != MVT::i64)
1438  return false;
1439  unsigned Opc = N.getOpcode();
1440  switch (Opc) {
1441  case ISD::SIGN_EXTEND:
1442  case ISD::SIGN_EXTEND_INREG: {
1443  // sext_inreg has the source type as a separate operand.
1444  EVT T = Opc == ISD::SIGN_EXTEND
1445  ? N.getOperand(0).getValueType()
1446  : cast<VTSDNode>(N.getOperand(1))->getVT();
1447  unsigned SW = T.getSizeInBits();
1448  if (SW == 32)
1449  R = N.getOperand(0);
1450  else if (SW < 32)
1451  R = N;
1452  else
1453  return false;
1454  break;
1455  }
1456  case ISD::LOAD: {
1457  LoadSDNode *L = cast<LoadSDNode>(N);
1458  if (L->getExtensionType() != ISD::SEXTLOAD)
1459  return false;
1460  // All extending loads extend to i32, so even if the value in
1461  // memory is shorter than 32 bits, it will be i32 after the load.
1462  if (L->getMemoryVT().getSizeInBits() > 32)
1463  return false;
1464  R = N;
1465  break;
1466  }
1467  case ISD::SRA: {
1468  auto *S = dyn_cast<ConstantSDNode>(N.getOperand(1));
1469  if (!S || S->getZExtValue() != 32)
1470  return false;
1471  R = N;
1472  break;
1473  }
1474  default:
1475  return false;
1476  }
1477  EVT RT = R.getValueType();
1478  if (RT == MVT::i64)
1479  return true;
1480  assert(RT == MVT::i32);
1481  // This is only to produce a value of type i64. Do not rely on the
1482  // high bits produced by this.
1483  const SDLoc &dl(N);
1484  SDValue Ops[] = {
1485  CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
1486  R, CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
1487  R, CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
1488  };
1489  SDNode *T = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
1490  MVT::i64, Ops);
1491  R = SDValue(T, 0);
1492  return true;
1493 }
1494 
1495 bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
1496  SDValue &Src) {
1497  unsigned Opc = Val.getOpcode();
1498  switch (Opc) {
1499  case ISD::SIGN_EXTEND:
1500  case ISD::ZERO_EXTEND:
1501  case ISD::ANY_EXTEND: {
1502  const SDValue &Op0 = Val.getOperand(0);
1503  EVT T = Op0.getValueType();
1504  if (T.isInteger() && T.getSizeInBits() == NumBits) {
1505  Src = Op0;
1506  return true;
1507  }
1508  break;
1509  }
1511  case ISD::AssertSext:
1512  case ISD::AssertZext:
1513  if (Val.getOperand(0).getValueType().isInteger()) {
1514  VTSDNode *T = cast<VTSDNode>(Val.getOperand(1));
1515  if (T->getVT().getSizeInBits() == NumBits) {
1516  Src = Val.getOperand(0);
1517  return true;
1518  }
1519  }
1520  break;
1521  case ISD::AND: {
1522  // Check if this is an AND with NumBits of lower bits set to 1.
1523  uint64_t Mask = (1 << NumBits) - 1;
1524  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1525  if (C->getZExtValue() == Mask) {
1526  Src = Val.getOperand(1);
1527  return true;
1528  }
1529  }
1530  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1531  if (C->getZExtValue() == Mask) {
1532  Src = Val.getOperand(0);
1533  return true;
1534  }
1535  }
1536  break;
1537  }
1538  case ISD::OR:
1539  case ISD::XOR: {
1540  // OR/XOR with the lower NumBits bits set to 0.
1541  uint64_t Mask = (1 << NumBits) - 1;
1542  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1543  if ((C->getZExtValue() & Mask) == 0) {
1544  Src = Val.getOperand(1);
1545  return true;
1546  }
1547  }
1548  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1549  if ((C->getZExtValue() & Mask) == 0) {
1550  Src = Val.getOperand(0);
1551  return true;
1552  }
1553  }
1554  break;
1555  }
1556  default:
1557  break;
1558  }
1559  return false;
1560 }
1561 
1562 bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
1563  return N->getAlignment() >= N->getMemoryVT().getStoreSize();
1564 }
1565 
1566 bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const {
1567  unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF);
1568  switch (N->getMemoryVT().getStoreSize()) {
1569  case 1:
1570  return StackSize <= 56; // 1*2^6 - 8
1571  case 2:
1572  return StackSize <= 120; // 2*2^6 - 8
1573  case 4:
1574  return StackSize <= 248; // 4*2^6 - 8
1575  default:
1576  return false;
1577  }
1578 }
1579 
1580 // Return true when the given node fits in a positive half word.
1581 bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const {
1582  if (const ConstantSDNode *CN = dyn_cast<const ConstantSDNode>(N)) {
1583  int64_t V = CN->getSExtValue();
1584  return V > 0 && isInt<16>(V);
1585  }
1586  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
1587  const VTSDNode *VN = dyn_cast<const VTSDNode>(N->getOperand(1));
1588  return VN->getVT().getSizeInBits() <= 16;
1589  }
1590  return false;
1591 }
1592 
1593 bool HexagonDAGToDAGISel::hasOneUse(const SDNode *N) const {
1594  return !CheckSingleUse || N->hasOneUse();
1595 }
1596 
1597 ////////////////////////////////////////////////////////////////////////////////
1598 // Rebalancing of address calculation trees
1599 
1600 static bool isOpcodeHandled(const SDNode *N) {
1601  switch (N->getOpcode()) {
1602  case ISD::ADD:
1603  case ISD::MUL:
1604  return true;
1605  case ISD::SHL:
1606  // We only handle constant shifts because these can be easily flattened
1607  // into multiplications by 2^Op1.
1608  return isa<ConstantSDNode>(N->getOperand(1).getNode());
1609  default:
1610  return false;
1611  }
1612 }
1613 
1614 /// Return the weight of an SDNode
1615 int HexagonDAGToDAGISel::getWeight(SDNode *N) {
1616  if (!isOpcodeHandled(N))
1617  return 1;
1618  assert(RootWeights.count(N) && "Cannot get weight of unseen root!");
1619  assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!");
1620  assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!");
1621  return RootWeights[N];
1622 }
1623 
1624 int HexagonDAGToDAGISel::getHeight(SDNode *N) {
1625  if (!isOpcodeHandled(N))
1626  return 0;
1627  assert(RootWeights.count(N) && RootWeights[N] >= 0 &&
1628  "Cannot query height of unvisited/RAUW'd node!");
1629  return RootHeights[N];
1630 }
1631 
1632 namespace {
1633 struct WeightedLeaf {
1634  SDValue Value;
1635  int Weight;
1636  int InsertionOrder;
1637 
1638  WeightedLeaf() : Value(SDValue()) { }
1639 
1640  WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
1641  Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
1642  assert(Weight >= 0 && "Weight must be >= 0");
1643  }
1644 
1645  static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) {
1646  assert(A.Value.getNode() && B.Value.getNode());
1647  return A.Weight == B.Weight ?
1648  (A.InsertionOrder > B.InsertionOrder) :
1649  (A.Weight > B.Weight);
1650  }
1651 };
1652 
1653 /// A specialized priority queue for WeigthedLeaves. It automatically folds
1654 /// constants and allows removal of non-top elements while maintaining the
1655 /// priority order.
1656 class LeafPrioQueue {
1658  bool HaveConst;
1659  WeightedLeaf ConstElt;
1660  unsigned Opcode;
1661 
1662 public:
1663  bool empty() {
1664  return (!HaveConst && Q.empty());
1665  }
1666 
1667  size_t size() {
1668  return Q.size() + HaveConst;
1669  }
1670 
1671  bool hasConst() {
1672  return HaveConst;
1673  }
1674 
1675  const WeightedLeaf &top() {
1676  if (HaveConst)
1677  return ConstElt;
1678  return Q.front();
1679  }
1680 
1681  WeightedLeaf pop() {
1682  if (HaveConst) {
1683  HaveConst = false;
1684  return ConstElt;
1685  }
1686  std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1687  return Q.pop_back_val();
1688  }
1689 
1690  void push(WeightedLeaf L, bool SeparateConst=true) {
1691  if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) {
1692  if (Opcode == ISD::MUL &&
1693  cast<ConstantSDNode>(L.Value)->getSExtValue() == 1)
1694  return;
1695  if (Opcode == ISD::ADD &&
1696  cast<ConstantSDNode>(L.Value)->getSExtValue() == 0)
1697  return;
1698 
1699  HaveConst = true;
1700  ConstElt = L;
1701  } else {
1702  Q.push_back(L);
1703  std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1704  }
1705  }
1706 
1707  /// Push L to the bottom of the queue regardless of its weight. If L is
1708  /// constant, it will not be folded with other constants in the queue.
1709  void pushToBottom(WeightedLeaf L) {
1710  L.Weight = 1000;
1711  push(L, false);
1712  }
1713 
1714  /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of
1715  /// lowest weight and remove it from the queue.
1716  WeightedLeaf findSHL(uint64_t MaxAmount);
1717 
1718  WeightedLeaf findMULbyConst();
1719 
1720  LeafPrioQueue(unsigned Opcode) :
1721  HaveConst(false), Opcode(Opcode) { }
1722 };
1723 } // end anonymous namespace
1724 
1725 WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) {
1726  int ResultPos;
1727  WeightedLeaf Result;
1728 
1729  for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1730  const WeightedLeaf &L = Q[Pos];
1731  const SDValue &Val = L.Value;
1732  if (Val.getOpcode() != ISD::SHL ||
1733  !isa<ConstantSDNode>(Val.getOperand(1)) ||
1734  Val.getConstantOperandVal(1) > MaxAmount)
1735  continue;
1736  if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1737  (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1738  {
1739  Result = L;
1740  ResultPos = Pos;
1741  }
1742  }
1743 
1744  if (Result.Value.getNode()) {
1745  Q.erase(&Q[ResultPos]);
1746  std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1747  }
1748 
1749  return Result;
1750 }
1751 
1752 WeightedLeaf LeafPrioQueue::findMULbyConst() {
1753  int ResultPos;
1754  WeightedLeaf Result;
1755 
1756  for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1757  const WeightedLeaf &L = Q[Pos];
1758  const SDValue &Val = L.Value;
1759  if (Val.getOpcode() != ISD::MUL ||
1760  !isa<ConstantSDNode>(Val.getOperand(1)) ||
1761  Val.getConstantOperandVal(1) > 127)
1762  continue;
1763  if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1764  (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1765  {
1766  Result = L;
1767  ResultPos = Pos;
1768  }
1769  }
1770 
1771  if (Result.Value.getNode()) {
1772  Q.erase(&Q[ResultPos]);
1773  std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1774  }
1775 
1776  return Result;
1777 }
1778 
1779 SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) {
1780  uint64_t MulFactor = 1ull << N->getConstantOperandVal(1);
1781  return CurDAG->getConstant(MulFactor, SDLoc(N),
1782  N->getOperand(1).getValueType());
1783 }
1784 
1785 /// @returns the value x for which 2^x is a factor of Val
1786 static unsigned getPowerOf2Factor(SDValue Val) {
1787  if (Val.getOpcode() == ISD::MUL) {
1788  unsigned MaxFactor = 0;
1789  for (int i = 0; i < 2; ++i) {
1791  if (!C)
1792  continue;
1793  const APInt &CInt = C->getAPIntValue();
1794  if (CInt.getBoolValue())
1795  MaxFactor = CInt.countTrailingZeros();
1796  }
1797  return MaxFactor;
1798  }
1799  if (Val.getOpcode() == ISD::SHL) {
1800  if (!isa<ConstantSDNode>(Val.getOperand(1).getNode()))
1801  return 0;
1802  return (unsigned) Val.getConstantOperandVal(1);
1803  }
1804 
1805  return 0;
1806 }
1807 
1808 /// @returns true if V>>Amount will eliminate V's operation on its child
1809 static bool willShiftRightEliminate(SDValue V, unsigned Amount) {
1810  if (V.getOpcode() == ISD::MUL) {
1811  SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1812  for (int i = 0; i < 2; ++i)
1813  if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1814  V.getConstantOperandVal(i) % (1ULL << Amount) == 0) {
1815  uint64_t NewConst = V.getConstantOperandVal(i) >> Amount;
1816  return (NewConst == 1);
1817  }
1818  } else if (V.getOpcode() == ISD::SHL) {
1819  return (Amount == V.getConstantOperandVal(1));
1820  }
1821 
1822  return false;
1823 }
1824 
1825 SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) {
1826  SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1827  if (V.getOpcode() == ISD::MUL) {
1828  for (int i=0; i < 2; ++i) {
1829  if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1830  V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) {
1831  uint64_t NewConst = V.getConstantOperandVal(i) >> Power;
1832  if (NewConst == 1)
1833  return Ops[!i];
1834  Ops[i] = CurDAG->getConstant(NewConst,
1835  SDLoc(V), V.getValueType());
1836  break;
1837  }
1838  }
1839  } else if (V.getOpcode() == ISD::SHL) {
1840  uint64_t ShiftAmount = V.getConstantOperandVal(1);
1841  if (ShiftAmount == Power)
1842  return Ops[0];
1843  Ops[1] = CurDAG->getConstant(ShiftAmount - Power,
1844  SDLoc(V), V.getValueType());
1845  }
1846 
1847  return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops);
1848 }
1849 
1850 static bool isTargetConstant(const SDValue &V) {
1851  return V.getOpcode() == HexagonISD::CONST32 ||
1853 }
1854 
1855 unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) {
1856  if (GAUsesInFunction.count(V))
1857  return GAUsesInFunction[V];
1858 
1859  unsigned Result = 0;
1860  const Function &CurF = CurDAG->getMachineFunction().getFunction();
1861  for (const User *U : V->users()) {
1862  if (isa<Instruction>(U) &&
1863  cast<Instruction>(U)->getParent()->getParent() == &CurF)
1864  ++Result;
1865  }
1866 
1867  GAUsesInFunction[V] = Result;
1868 
1869  return Result;
1870 }
1871 
1872 /// Note - After calling this, N may be dead. It may have been replaced by a
1873 /// new node, so always use the returned value in place of N.
1874 ///
1875 /// @returns The SDValue taking the place of N (which could be N if it is
1876 /// unchanged)
1877 SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
1878  assert(RootWeights.count(N) && "Cannot balance non-root node.");
1879  assert(RootWeights[N] != -2 && "This node was RAUW'd!");
1880  assert(!TopLevel || N->getOpcode() == ISD::ADD);
1881 
1882  // Return early if this node was already visited
1883  if (RootWeights[N] != -1)
1884  return SDValue(N, 0);
1885 
1886  assert(isOpcodeHandled(N));
1887 
1888  SDValue Op0 = N->getOperand(0);
1889  SDValue Op1 = N->getOperand(1);
1890 
1891  // Return early if the operands will remain unchanged or are all roots
1892  if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) &&
1893  (!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) {
1894  SDNode *Op0N = Op0.getNode();
1895  int Weight;
1896  if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) {
1897  Weight = getWeight(balanceSubTree(Op0N).getNode());
1898  // Weight = calculateWeight(Op0N);
1899  } else
1900  Weight = getWeight(Op0N);
1901 
1902  SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd
1903  if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) {
1904  Weight += getWeight(balanceSubTree(Op1N).getNode());
1905  // Weight += calculateWeight(Op1N);
1906  } else
1907  Weight += getWeight(Op1N);
1908 
1909  RootWeights[N] = Weight;
1910  RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
1911  getHeight(N->getOperand(1).getNode())) + 1;
1912 
1913  LLVM_DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
1914  << " Height=" << RootHeights[N] << "): ");
1915  LLVM_DEBUG(N->dump(CurDAG));
1916 
1917  return SDValue(N, 0);
1918  }
1919 
1920  LLVM_DEBUG(dbgs() << "** Balancing root node: ");
1921  LLVM_DEBUG(N->dump(CurDAG));
1922 
1923  unsigned NOpcode = N->getOpcode();
1924 
1925  LeafPrioQueue Leaves(NOpcode);
1926  SmallVector<SDValue, 4> Worklist;
1927  Worklist.push_back(SDValue(N, 0));
1928 
1929  // SHL nodes will be converted to MUL nodes
1930  if (NOpcode == ISD::SHL)
1931  NOpcode = ISD::MUL;
1932 
1933  bool CanFactorize = false;
1934  WeightedLeaf Mul1, Mul2;
1935  unsigned MaxPowerOf2 = 0;
1936  WeightedLeaf GA;
1937 
1938  // Do not try to factor out a shift if there is already a shift at the tip of
1939  // the tree.
1940  bool HaveTopLevelShift = false;
1941  if (TopLevel &&
1942  ((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL &&
1943  Op0.getConstantOperandVal(1) < 4) ||
1944  (isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL &&
1945  Op1.getConstantOperandVal(1) < 4)))
1946  HaveTopLevelShift = true;
1947 
1948  // Flatten the subtree into an ordered list of leaves; at the same time
1949  // determine whether the tree is already balanced.
1950  int InsertionOrder = 0;
1951  SmallDenseMap<SDValue, int> NodeHeights;
1952  bool Imbalanced = false;
1953  int CurrentWeight = 0;
1954  while (!Worklist.empty()) {
1955  SDValue Child = Worklist.pop_back_val();
1956 
1957  if (Child.getNode() != N && RootWeights.count(Child.getNode())) {
1958  // CASE 1: Child is a root note
1959 
1960  int Weight = RootWeights[Child.getNode()];
1961  if (Weight == -1) {
1962  Child = balanceSubTree(Child.getNode());
1963  // calculateWeight(Child.getNode());
1964  Weight = getWeight(Child.getNode());
1965  } else if (Weight == -2) {
1966  // Whoops, this node was RAUWd by one of the balanceSubTree calls we
1967  // made. Our worklist isn't up to date anymore.
1968  // Restart the whole process.
1969  LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
1970  return balanceSubTree(N, TopLevel);
1971  }
1972 
1973  NodeHeights[Child] = 1;
1974  CurrentWeight += Weight;
1975 
1976  unsigned PowerOf2;
1977  if (TopLevel && !CanFactorize && !HaveTopLevelShift &&
1978  (Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) &&
1979  Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) {
1980  // Try to identify two factorizable MUL/SHL children greedily. Leave
1981  // them out of the priority queue for now so we can deal with them
1982  // after.
1983  if (!Mul1.Value.getNode()) {
1984  Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++);
1985  MaxPowerOf2 = PowerOf2;
1986  } else {
1987  Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++);
1988  MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2);
1989 
1990  // Our addressing modes can only shift by a maximum of 3
1991  if (MaxPowerOf2 > 3)
1992  MaxPowerOf2 = 3;
1993 
1994  CanFactorize = true;
1995  }
1996  } else
1997  Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
1998  } else if (!isOpcodeHandled(Child.getNode())) {
1999  // CASE 2: Child is an unhandled kind of node (e.g. constant)
2000  int Weight = getWeight(Child.getNode());
2001 
2002  NodeHeights[Child] = getHeight(Child.getNode());
2003  CurrentWeight += Weight;
2004 
2005  if (isTargetConstant(Child) && !GA.Value.getNode())
2006  GA = WeightedLeaf(Child, Weight, InsertionOrder++);
2007  else
2008  Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
2009  } else {
2010  // CASE 3: Child is a subtree of same opcode
2011  // Visit children first, then flatten.
2012  unsigned ChildOpcode = Child.getOpcode();
2013  assert(ChildOpcode == NOpcode ||
2014  (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL));
2015 
2016  // Convert SHL to MUL
2017  SDValue Op1;
2018  if (ChildOpcode == ISD::SHL)
2019  Op1 = getMultiplierForSHL(Child.getNode());
2020  else
2021  Op1 = Child->getOperand(1);
2022 
2023  if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) {
2024  assert(!NodeHeights.count(Child) && "Parent visited before children?");
2025  // Visit children first, then re-visit this node
2026  Worklist.push_back(Child);
2027  Worklist.push_back(Op1);
2028  Worklist.push_back(Child->getOperand(0));
2029  } else {
2030  // Back at this node after visiting the children
2031  if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1)
2032  Imbalanced = true;
2033 
2034  NodeHeights[Child] = std::max(NodeHeights[Op1],
2035  NodeHeights[Child->getOperand(0)]) + 1;
2036  }
2037  }
2038  }
2039 
2040  LLVM_DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
2041  << " weight=" << CurrentWeight
2042  << " imbalanced=" << Imbalanced << "\n");
2043 
2044  // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
2045  // This factors out a shift in order to match memw(a<<Y+b).
2046  if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
2047  willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) {
2048  LLVM_DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
2049  int Weight = Mul1.Weight + Mul2.Weight;
2050  int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
2051  SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
2052  SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2);
2053  SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(),
2054  Mul1Factored, Mul2Factored);
2055  SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N),
2056  Mul1.Value.getValueType());
2057  SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(),
2058  Sum, Const);
2059  NodeHeights[New] = Height;
2060  Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder));
2061  } else if (Mul1.Value.getNode()) {
2062  // We failed to factorize two MULs, so now the Muls are left outside the
2063  // queue... add them back.
2064  Leaves.push(Mul1);
2065  if (Mul2.Value.getNode())
2066  Leaves.push(Mul2);
2067  CanFactorize = false;
2068  }
2069 
2070  // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere
2071  // and the root node itself is not used more than twice. This reduces the
2072  // amount of additional constant extenders introduced by this optimization.
2073  bool CombinedGA = false;
2074  if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() &&
2075  GA.Value.hasOneUse() && N->use_size() < 3) {
2076  GlobalAddressSDNode *GANode =
2077  cast<GlobalAddressSDNode>(GA.Value.getOperand(0));
2078  ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value);
2079 
2080  if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() &&
2082  LLVM_DEBUG(dbgs() << "--> Combining GA and offset ("
2083  << Offset->getSExtValue() << "): ");
2084  LLVM_DEBUG(GANode->dump(CurDAG));
2085 
2086  SDValue NewTGA =
2087  CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
2088  GANode->getValueType(0),
2089  GANode->getOffset() + (uint64_t)Offset->getSExtValue());
2090  GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value),
2091  GA.Value.getValueType(), NewTGA);
2092  GA.Weight += Leaves.top().Weight;
2093 
2094  NodeHeights[GA.Value] = getHeight(GA.Value.getNode());
2095  CombinedGA = true;
2096 
2097  Leaves.pop(); // Remove the offset constant from the queue
2098  }
2099  }
2100 
2101  if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) ||
2102  (RebalanceOnlyImbalancedTrees && !Imbalanced)) {
2103  RootWeights[N] = CurrentWeight;
2104  RootHeights[N] = NodeHeights[SDValue(N, 0)];
2105 
2106  return SDValue(N, 0);
2107  }
2108 
2109  // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5))
2110  if (NOpcode == ISD::ADD && GA.Value.getNode()) {
2111  WeightedLeaf SHL = Leaves.findSHL(31);
2112  if (SHL.Value.getNode()) {
2113  int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1;
2114  GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value),
2115  GA.Value.getValueType(),
2116  GA.Value, SHL.Value);
2117  GA.Weight = SHL.Weight; // Specifically ignore the GA weight here
2118  NodeHeights[GA.Value] = Height;
2119  }
2120  }
2121 
2122  if (GA.Value.getNode())
2123  Leaves.push(GA);
2124 
2125  // If this is the top level and we haven't factored out a shift, we should try
2126  // to move a constant to the bottom to match addressing modes like memw(rX+C)
2127  if (TopLevel && !CanFactorize && Leaves.hasConst()) {
2128  LLVM_DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
2129  Leaves.pushToBottom(Leaves.pop());
2130  }
2131 
2132  const DataLayout &DL = CurDAG->getDataLayout();
2133  const TargetLowering &TLI = *getTargetLowering();
2134 
2135  // Rebuild the tree using Huffman's algorithm
2136  while (Leaves.size() > 1) {
2137  WeightedLeaf L0 = Leaves.pop();
2138 
2139  // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)),
2140  // otherwise just get the next leaf
2141  WeightedLeaf L1 = Leaves.findMULbyConst();
2142  if (!L1.Value.getNode())
2143  L1 = Leaves.pop();
2144 
2145  assert(L0.Weight <= L1.Weight && "Priority queue is broken!");
2146 
2147  SDValue V0 = L0.Value;
2148  int V0Weight = L0.Weight;
2149  SDValue V1 = L1.Value;
2150  int V1Weight = L1.Weight;
2151 
2152  // Make sure that none of these nodes have been RAUW'd
2153  if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) ||
2154  (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) {
2155  LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
2156  return balanceSubTree(N, TopLevel);
2157  }
2158 
2161  EVT VT = N->getValueType(0);
2162  SDValue NewNode;
2163 
2164  if (V0C && !V1C) {
2165  std::swap(V0, V1);
2166  std::swap(V0C, V1C);
2167  }
2168 
2169  // Calculate height of this node
2170  assert(NodeHeights.count(V0) && NodeHeights.count(V1) &&
2171  "Children must have been visited before re-combining them!");
2172  int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1;
2173 
2174  // Rebuild this node (and restore SHL from MUL if needed)
2175  if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2())
2176  NewNode = CurDAG->getNode(
2177  ISD::SHL, SDLoc(V0), VT, V0,
2179  V1C->getAPIntValue().logBase2(), SDLoc(N),
2180  TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2181  else
2182  NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1);
2183 
2184  NodeHeights[NewNode] = Height;
2185 
2186  int Weight = V0Weight + V1Weight;
2187  Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
2188 
2189  LLVM_DEBUG(dbgs() << "--> Built new node (Weight=" << Weight
2190  << ",Height=" << Height << "):\n");
2191  LLVM_DEBUG(NewNode.dump());
2192  }
2193 
2194  assert(Leaves.size() == 1);
2195  SDValue NewRoot = Leaves.top().Value;
2196 
2197  assert(NodeHeights.count(NewRoot));
2198  int Height = NodeHeights[NewRoot];
2199 
2200  // Restore SHL if we earlier converted it to a MUL
2201  if (NewRoot.getOpcode() == ISD::MUL) {
2202  ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1));
2203  if (V1C && V1C->getAPIntValue().isPowerOf2()) {
2204  EVT VT = NewRoot.getValueType();
2205  SDValue V0 = NewRoot.getOperand(0);
2206  NewRoot = CurDAG->getNode(
2207  ISD::SHL, SDLoc(NewRoot), VT, V0,
2209  V1C->getAPIntValue().logBase2(), SDLoc(NewRoot),
2210  TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2211  }
2212  }
2213 
2214  if (N != NewRoot.getNode()) {
2215  LLVM_DEBUG(dbgs() << "--> Root is now: ");
2216  LLVM_DEBUG(NewRoot.dump());
2217 
2218  // Replace all uses of old root by new root
2219  CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
2220  // Mark that we have RAUW'd N
2221  RootWeights[N] = -2;
2222  } else {
2223  LLVM_DEBUG(dbgs() << "--> Root unchanged.\n");
2224  }
2225 
2226  RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
2227  RootHeights[NewRoot.getNode()] = Height;
2228 
2229  return NewRoot;
2230 }
2231 
2232 void HexagonDAGToDAGISel::rebalanceAddressTrees() {
2233  for (auto I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E;) {
2234  SDNode *N = &*I++;
2235  if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
2236  continue;
2237 
2238  SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr();
2239  if (BasePtr.getOpcode() != ISD::ADD)
2240  continue;
2241 
2242  // We've already processed this node
2243  if (RootWeights.count(BasePtr.getNode()))
2244  continue;
2245 
2246  LLVM_DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
2247  LLVM_DEBUG(N->dump(CurDAG));
2248 
2249  // FindRoots
2250  SmallVector<SDNode *, 4> Worklist;
2251 
2252  Worklist.push_back(BasePtr.getOperand(0).getNode());
2253  Worklist.push_back(BasePtr.getOperand(1).getNode());
2254 
2255  while (!Worklist.empty()) {
2256  SDNode *N = Worklist.pop_back_val();
2257  unsigned Opcode = N->getOpcode();
2258 
2259  if (!isOpcodeHandled(N))
2260  continue;
2261 
2262  Worklist.push_back(N->getOperand(0).getNode());
2263  Worklist.push_back(N->getOperand(1).getNode());
2264 
2265  // Not a root if it has only one use and same opcode as its parent
2266  if (N->hasOneUse() && Opcode == N->use_begin()->getOpcode())
2267  continue;
2268 
2269  // This root node has already been processed
2270  if (RootWeights.count(N))
2271  continue;
2272 
2273  RootWeights[N] = -1;
2274  }
2275 
2276  // Balance node itself
2277  RootWeights[BasePtr.getNode()] = -1;
2278  SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true);
2279 
2280  if (N->getOpcode() == ISD::LOAD)
2281  N = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
2282  NewBasePtr, N->getOperand(2));
2283  else
2284  N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
2285  NewBasePtr, N->getOperand(3));
2286 
2287  LLVM_DEBUG(dbgs() << "--> Final node: ");
2288  LLVM_DEBUG(N->dump(CurDAG));
2289  }
2290 
2292  GAUsesInFunction.clear();
2293  RootHeights.clear();
2294  RootWeights.clear();
2295 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
unsigned CreateReg(MVT VT, bool isDivergent=false)
CreateReg - Allocate a single virtual register for the given type.
uint64_t CallInst * C
static MVT getIntegerVT(unsigned BitWidth)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOffset() const
const GlobalValue * getGlobal() const
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
static cl::opt< bool > CheckSingleUse("hexagon-isel-su", cl::Hidden, cl::init(true), cl::desc("Enable checking of SDNode's single-use status"))
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static bool isMemOPCandidate(SDNode *I, SDNode *U)
static bool willShiftRightEliminate(SDValue V, unsigned Amount)
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:391
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool DetectUseSxtw(SDValue &N, SDValue &R)
const SDValue & getValue() const
SDVTList getVTList() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
bool SelectAddrFI(SDValue &N, SDValue &R)
const SDValue & getChain() const
static unsigned getPowerOf2Factor(SDValue Val)
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:130
unsigned getAlignment() const
Hexagon target-specific information for each MachineFunction.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:305
A debug info location.
Definition: DebugLoc.h:33
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:140
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:684
const HexagonFrameLowering * getFrameLowering() const override
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
MachineFunction * MF
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
bool isTruncatingStore() const
Return true if the op does a truncation before store.
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1631
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
static ManagedStatic< DebugCounter > DC
static cl::opt< bool > RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false), cl::desc("Rebalance address tree only if this allows optimizations"))
Shift and rotation operations.
Definition: ISDOpcodes.h:434
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
const TargetLowering * TLI
static bool isTargetConstant(const SDValue &V)
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:303
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl)
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:463
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:403
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This class is used to represent EVT&#39;s, which are used to parameterize some operations.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:452
unsigned getSizeInBits() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:477
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:400
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:634
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:582
bool SelectNewCircIntrinsic(SDNode *IntN)
Generate a machine instruction node for the new circlar buffer intrinsics.
static cl::opt< bool > EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true), cl::desc("Rebalance address calculation trees to improve " "instruction selection"))
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:150
bool SelectBrevLdIntrinsic(SDNode *IntN)
SDNode * StoreInstrForLoadIntrinsic(MachineSDNode *LoadN, SDNode *IntN)
This class is used to represent ISD::STORE nodes.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
const SDValue & getBasePtr() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool SelectAnyImm0(SDValue &N, SDValue &R)
bool SelectAnyInt(SDValue &N, SDValue &R)
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:970
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions...
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
const SDValue & getOffset() const
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool isValidAutoIncImm(const EVT VT, const int Offset) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
const APInt & getAPIntValue() const
bool SelectAnyImm2(SDValue &N, SDValue &R)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:56
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
Extended Value Type.
Definition: ValueTypes.h:33
void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl)
const MachineBasicBlock & front() const
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:188
size_t size() const
Definition: SmallVector.h:52
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:209
static bool isOpcodeHandled(const SDNode *N)
void dump() const
Dump this node, for debugging.
bool isHVXVectorType(MVT VecTy, bool IncludeBool=false) const
bool SelectAddrGA(SDValue &N, SDValue &R)
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1173
BlockVerifier::State From
ilist< SDNode >::size_type allnodes_size() const
Definition: SelectionDAG.h:448
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP, uint32_t LogAlign)
unsigned estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
void dump() const
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:1050
virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const
EVT is not used in-tree, but is used by out-of-tree target.
An SDNode that represents everything that will be needed to construct a MachineInstr.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
CHAIN = SC CHAIN, Imm128 - System call.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Represents one node in the SelectionDAG.
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:440
const Function & getFunction() const
Return the LLVM function that this machine code represents.
bool SelectAnyImmediate(SDValue &N, SDValue &R, uint32_t LogAlign)
unsigned logBase2() const
Definition: APInt.h:1747
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
size_t use_size() const
Return the number of uses of this node.
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:444
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:463
iterator_range< user_iterator > users()
Definition: Value.h:399
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:492
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:495
static cl::opt< bool > RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden, cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"))
bool SelectAnyImm3(SDValue &N, SDValue &R)
bool SelectAnyImm1(SDValue &N, SDValue &R)
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:441
unsigned getVectorLength() const
const SDValue & getValue() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:411
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:711
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
bool SelectAddrGP(SDValue &N, SDValue &R)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:510
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:642
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1212
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
bool tryLoadOfLoadIntrinsic(LoadSDNode *N)
FunctionPass * createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOpt::Level OptLevel)
unsigned getOpcode() const
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
const TargetLowering * getTargetLowering() const
bool SelectAnyImm(SDValue &N, SDValue &R)
This class is used to form a handle around another node that is persistent and is updated across invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:72
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
unsigned countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:477
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
static const Function * getParent(const Value *V)
bool isNonTemporal() const
const APFloat & getValueAPF() const
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
APInt bitcastToAPInt() const
Definition: APFloat.h:1093
Conversion operators.
Definition: ISDOpcodes.h:489
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
MachineSDNode * LoadInstrForLoadIntrinsic(SDNode *IntN)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand *> NewMemRefs)
Mutate the specified machine node&#39;s memory references to the provided list.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
FunctionLoweringInfo * FuncInfo
#define T1
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:628
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:950
This class is used to represent ISD::LOAD nodes.