LLVM 17.0.0git
HexagonISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the Hexagon target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "HexagonISelDAGToDAG.h"
14#include "Hexagon.h"
15#include "HexagonISelLowering.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsHexagon.h"
24#include "llvm/Support/Debug.h"
25using namespace llvm;
26
27#define DEBUG_TYPE "hexagon-isel"
28#define PASS_NAME "Hexagon DAG->DAG Pattern Instruction Selection"
29
30static
32EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true),
33 cl::desc("Rebalance address calculation trees to improve "
34 "instruction selection"));
35
36// Rebalance only if this allows e.g. combining a GA with an offset or
37// factoring out a shift.
38static
41 cl::desc("Rebalance address tree only if this allows optimizations"));
42
43static
46 cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));
47
48static cl::opt<bool> CheckSingleUse("hexagon-isel-su", cl::Hidden,
49 cl::init(true), cl::desc("Enable checking of SDNode's single-use status"));
50
51//===----------------------------------------------------------------------===//
52// Instruction Selector Implementation
53//===----------------------------------------------------------------------===//
54
55#define GET_DAGISEL_BODY HexagonDAGToDAGISel
56#include "HexagonGenDAGISel.inc"
57
58namespace llvm {
59/// createHexagonISelDag - This pass converts a legalized DAG into a
60/// Hexagon-specific DAG, ready for instruction scheduling.
62 CodeGenOpt::Level OptLevel) {
63 return new HexagonDAGToDAGISel(TM, OptLevel);
64}
65}
66
68
70
71void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
72 SDValue Chain = LD->getChain();
73 SDValue Base = LD->getBasePtr();
74 SDValue Offset = LD->getOffset();
75 int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
76 EVT LoadedVT = LD->getMemoryVT();
77 unsigned Opcode = 0;
78
79 // Check for zero extended loads. Treat any-extend loads as zero extended
80 // loads.
81 ISD::LoadExtType ExtType = LD->getExtensionType();
82 bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
83 bool IsValidInc = HII->isValidAutoIncImm(LoadedVT, Inc);
84
85 assert(LoadedVT.isSimple());
86 switch (LoadedVT.getSimpleVT().SimpleTy) {
87 case MVT::i8:
88 if (IsZeroExt)
89 Opcode = IsValidInc ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrub_io;
90 else
91 Opcode = IsValidInc ? Hexagon::L2_loadrb_pi : Hexagon::L2_loadrb_io;
92 break;
93 case MVT::i16:
94 if (IsZeroExt)
95 Opcode = IsValidInc ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadruh_io;
96 else
97 Opcode = IsValidInc ? Hexagon::L2_loadrh_pi : Hexagon::L2_loadrh_io;
98 break;
99 case MVT::i32:
100 case MVT::f32:
101 case MVT::v2i16:
102 case MVT::v4i8:
103 Opcode = IsValidInc ? Hexagon::L2_loadri_pi : Hexagon::L2_loadri_io;
104 break;
105 case MVT::i64:
106 case MVT::f64:
107 case MVT::v2i32:
108 case MVT::v4i16:
109 case MVT::v8i8:
110 Opcode = IsValidInc ? Hexagon::L2_loadrd_pi : Hexagon::L2_loadrd_io;
111 break;
112 case MVT::v64i8:
113 case MVT::v32i16:
114 case MVT::v16i32:
115 case MVT::v8i64:
116 case MVT::v128i8:
117 case MVT::v64i16:
118 case MVT::v32i32:
119 case MVT::v16i64:
120 if (isAlignedMemNode(LD)) {
121 if (LD->isNonTemporal())
122 Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai;
123 else
124 Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai;
125 } else {
126 Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai;
127 }
128 break;
129 default:
130 llvm_unreachable("Unexpected memory type in indexed load");
131 }
132
133 SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
134 MachineMemOperand *MemOp = LD->getMemOperand();
135
136 auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl)
137 -> MachineSDNode* {
138 if (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD) {
139 SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
140 return CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64,
141 Zero, SDValue(N, 0));
142 }
143 if (ExtType == ISD::SEXTLOAD)
144 return CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
145 SDValue(N, 0));
146 return N;
147 };
148
149 // Loaded value Next address Chain
150 SDValue From[3] = { SDValue(LD,0), SDValue(LD,1), SDValue(LD,2) };
151 SDValue To[3];
152
153 EVT ValueVT = LD->getValueType(0);
154 if (ValueVT == MVT::i64 && ExtType != ISD::NON_EXTLOAD) {
155 // A load extending to i64 will actually produce i32, which will then
156 // need to be extended to i64.
157 assert(LoadedVT.getSizeInBits() <= 32);
158 ValueVT = MVT::i32;
159 }
160
161 if (IsValidInc) {
162 MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT,
164 IncV, Chain);
165 CurDAG->setNodeMemRefs(L, {MemOp});
166 To[1] = SDValue(L, 1); // Next address.
167 To[2] = SDValue(L, 2); // Chain.
168 // Handle special case for extension to i64.
169 if (LD->getValueType(0) == MVT::i64)
170 L = getExt64(L, dl);
171 To[0] = SDValue(L, 0); // Loaded (extended) value.
172 } else {
173 SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
174 MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other,
175 Base, Zero, Chain);
176 CurDAG->setNodeMemRefs(L, {MemOp});
177 To[2] = SDValue(L, 1); // Chain.
178 MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
179 Base, IncV);
180 To[1] = SDValue(A, 0); // Next address.
181 // Handle special case for extension to i64.
182 if (LD->getValueType(0) == MVT::i64)
183 L = getExt64(L, dl);
184 To[0] = SDValue(L, 0); // Loaded (extended) value.
185 }
186 ReplaceUses(From, To, 3);
187 CurDAG->RemoveDeadNode(LD);
188}
189
191 if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
192 return nullptr;
193
194 SDLoc dl(IntN);
195 unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
196
197 static std::map<unsigned,unsigned> LoadPciMap = {
198 { Intrinsic::hexagon_circ_ldb, Hexagon::L2_loadrb_pci },
199 { Intrinsic::hexagon_circ_ldub, Hexagon::L2_loadrub_pci },
200 { Intrinsic::hexagon_circ_ldh, Hexagon::L2_loadrh_pci },
201 { Intrinsic::hexagon_circ_lduh, Hexagon::L2_loadruh_pci },
202 { Intrinsic::hexagon_circ_ldw, Hexagon::L2_loadri_pci },
203 { Intrinsic::hexagon_circ_ldd, Hexagon::L2_loadrd_pci },
204 };
205 auto FLC = LoadPciMap.find(IntNo);
206 if (FLC != LoadPciMap.end()) {
207 EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i64 : MVT::i32;
208 EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
209 // Operands: { Base, Increment, Modifier, Chain }
210 auto Inc = cast<ConstantSDNode>(IntN->getOperand(5));
211 SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), dl, MVT::i32);
212 MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys,
213 { IntN->getOperand(2), I, IntN->getOperand(4),
214 IntN->getOperand(0) });
215 return Res;
216 }
217
218 return nullptr;
219}
220
222 SDNode *IntN) {
223 // The "LoadN" is just a machine load instruction. The intrinsic also
224 // involves storing it. Generate an appropriate store to the location
225 // given in the intrinsic's operand(3).
226 uint64_t F = HII->get(LoadN->getMachineOpcode()).TSFlags;
227 unsigned SizeBits = (F >> HexagonII::MemAccessSizePos) &
229 unsigned Size = 1U << (SizeBits-1);
230
231 SDLoc dl(IntN);
233 SDValue TS;
234 SDValue Loc = IntN->getOperand(3);
235
236 if (Size >= 4)
237 TS = CurDAG->getStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, PI,
238 Align(Size));
239 else
240 TS = CurDAG->getTruncStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc,
241 PI, MVT::getIntegerVT(Size * 8), Align(Size));
242
243 SDNode *StoreN;
244 {
245 HandleSDNode Handle(TS);
246 SelectStore(TS.getNode());
247 StoreN = Handle.getValue().getNode();
248 }
249
250 // Load's results are { Loaded value, Updated pointer, Chain }
251 ReplaceUses(SDValue(IntN, 0), SDValue(LoadN, 1));
252 ReplaceUses(SDValue(IntN, 1), SDValue(StoreN, 0));
253 return StoreN;
254}
255
257 // The intrinsics for load circ/brev perform two operations:
258 // 1. Load a value V from the specified location, using the addressing
259 // mode corresponding to the intrinsic.
260 // 2. Store V into a specified location. This location is typically a
261 // local, temporary object.
262 // In many cases, the program using these intrinsics will immediately
263 // load V again from the local object. In those cases, when certain
264 // conditions are met, the last load can be removed.
265 // This function identifies and optimizes this pattern. If the pattern
266 // cannot be optimized, it returns nullptr, which will cause the load
267 // to be selected separately from the intrinsic (which will be handled
268 // in SelectIntrinsicWChain).
269
270 SDValue Ch = N->getOperand(0);
271 SDValue Loc = N->getOperand(1);
272
273 // Assume that the load and the intrinsic are connected directly with a
274 // chain:
275 // t1: i32,ch = int.load ..., ..., ..., Loc, ... // <-- C
276 // t2: i32,ch = load t1:1, Loc, ...
277 SDNode *C = Ch.getNode();
278
279 if (C->getOpcode() != ISD::INTRINSIC_W_CHAIN)
280 return false;
281
282 // The second load can only be eliminated if its extension type matches
283 // that of the load instruction corresponding to the intrinsic. The user
284 // can provide an address of an unsigned variable to store the result of
285 // a sign-extending intrinsic into (or the other way around).
286 ISD::LoadExtType IntExt;
287 switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) {
288 case Intrinsic::hexagon_circ_ldub:
289 case Intrinsic::hexagon_circ_lduh:
290 IntExt = ISD::ZEXTLOAD;
291 break;
292 case Intrinsic::hexagon_circ_ldw:
293 case Intrinsic::hexagon_circ_ldd:
294 IntExt = ISD::NON_EXTLOAD;
295 break;
296 default:
297 IntExt = ISD::SEXTLOAD;
298 break;
299 }
300 if (N->getExtensionType() != IntExt)
301 return false;
302
303 // Make sure the target location for the loaded value in the load intrinsic
304 // is the location from which LD (or N) is loading.
305 if (C->getNumOperands() < 4 || Loc.getNode() != C->getOperand(3).getNode())
306 return false;
307
310 SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) };
311 SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) };
312 ReplaceUses(F, T, std::size(T));
313 // This transformation will leave the intrinsic dead. If it remains in
314 // the DAG, the selection code will see it again, but without the load,
315 // and it will generate a store that is normally required for it.
317 return true;
318 }
319 return false;
320}
321
322// Convert the bit-reverse load intrinsic to appropriate target instruction.
324 if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
325 return false;
326
327 const SDLoc &dl(IntN);
328 unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
329
330 static const std::map<unsigned, unsigned> LoadBrevMap = {
331 { Intrinsic::hexagon_L2_loadrb_pbr, Hexagon::L2_loadrb_pbr },
332 { Intrinsic::hexagon_L2_loadrub_pbr, Hexagon::L2_loadrub_pbr },
333 { Intrinsic::hexagon_L2_loadrh_pbr, Hexagon::L2_loadrh_pbr },
334 { Intrinsic::hexagon_L2_loadruh_pbr, Hexagon::L2_loadruh_pbr },
335 { Intrinsic::hexagon_L2_loadri_pbr, Hexagon::L2_loadri_pbr },
336 { Intrinsic::hexagon_L2_loadrd_pbr, Hexagon::L2_loadrd_pbr }
337 };
338 auto FLI = LoadBrevMap.find(IntNo);
339 if (FLI != LoadBrevMap.end()) {
340 EVT ValTy =
341 (IntNo == Intrinsic::hexagon_L2_loadrd_pbr) ? MVT::i64 : MVT::i32;
342 EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
343 // Operands of Intrinsic: {chain, enum ID of intrinsic, baseptr,
344 // modifier}.
345 // Operands of target instruction: { Base, Modifier, Chain }.
347 FLI->second, dl, RTys,
348 {IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(0)});
349
350 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
351 CurDAG->setNodeMemRefs(Res, {MemOp});
352
353 ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
354 ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
355 ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
356 CurDAG->RemoveDeadNode(IntN);
357 return true;
358 }
359 return false;
360}
361
362/// Generate a machine instruction node for the new circular buffer intrinsics.
363/// The new versions use a CSx register instead of the K field.
365 if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
366 return false;
367
368 SDLoc DL(IntN);
369 unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
371
372 static std::map<unsigned,unsigned> LoadNPcMap = {
373 { Intrinsic::hexagon_L2_loadrub_pci, Hexagon::PS_loadrub_pci },
374 { Intrinsic::hexagon_L2_loadrb_pci, Hexagon::PS_loadrb_pci },
375 { Intrinsic::hexagon_L2_loadruh_pci, Hexagon::PS_loadruh_pci },
376 { Intrinsic::hexagon_L2_loadrh_pci, Hexagon::PS_loadrh_pci },
377 { Intrinsic::hexagon_L2_loadri_pci, Hexagon::PS_loadri_pci },
378 { Intrinsic::hexagon_L2_loadrd_pci, Hexagon::PS_loadrd_pci },
379 { Intrinsic::hexagon_L2_loadrub_pcr, Hexagon::PS_loadrub_pcr },
380 { Intrinsic::hexagon_L2_loadrb_pcr, Hexagon::PS_loadrb_pcr },
381 { Intrinsic::hexagon_L2_loadruh_pcr, Hexagon::PS_loadruh_pcr },
382 { Intrinsic::hexagon_L2_loadrh_pcr, Hexagon::PS_loadrh_pcr },
383 { Intrinsic::hexagon_L2_loadri_pcr, Hexagon::PS_loadri_pcr },
384 { Intrinsic::hexagon_L2_loadrd_pcr, Hexagon::PS_loadrd_pcr }
385 };
386 auto FLI = LoadNPcMap.find (IntNo);
387 if (FLI != LoadNPcMap.end()) {
388 EVT ValTy = MVT::i32;
389 if (IntNo == Intrinsic::hexagon_L2_loadrd_pci ||
390 IntNo == Intrinsic::hexagon_L2_loadrd_pcr)
391 ValTy = MVT::i64;
392 EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
393 // Handle load.*_pci case which has 6 operands.
394 if (IntN->getNumOperands() == 6) {
395 auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
396 SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
397 // Operands: { Base, Increment, Modifier, Start, Chain }.
398 Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
399 IntN->getOperand(0) };
400 } else
401 // Handle load.*_pcr case which has 5 operands.
402 // Operands: { Base, Modifier, Start, Chain }.
403 Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
404 IntN->getOperand(0) };
405 MachineSDNode *Res = CurDAG->getMachineNode(FLI->second, DL, RTys, Ops);
406 ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
407 ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
408 ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
409 CurDAG->RemoveDeadNode(IntN);
410 return true;
411 }
412
413 static std::map<unsigned,unsigned> StoreNPcMap = {
414 { Intrinsic::hexagon_S2_storerb_pci, Hexagon::PS_storerb_pci },
415 { Intrinsic::hexagon_S2_storerh_pci, Hexagon::PS_storerh_pci },
416 { Intrinsic::hexagon_S2_storerf_pci, Hexagon::PS_storerf_pci },
417 { Intrinsic::hexagon_S2_storeri_pci, Hexagon::PS_storeri_pci },
418 { Intrinsic::hexagon_S2_storerd_pci, Hexagon::PS_storerd_pci },
419 { Intrinsic::hexagon_S2_storerb_pcr, Hexagon::PS_storerb_pcr },
420 { Intrinsic::hexagon_S2_storerh_pcr, Hexagon::PS_storerh_pcr },
421 { Intrinsic::hexagon_S2_storerf_pcr, Hexagon::PS_storerf_pcr },
422 { Intrinsic::hexagon_S2_storeri_pcr, Hexagon::PS_storeri_pcr },
423 { Intrinsic::hexagon_S2_storerd_pcr, Hexagon::PS_storerd_pcr }
424 };
425 auto FSI = StoreNPcMap.find (IntNo);
426 if (FSI != StoreNPcMap.end()) {
427 EVT RTys[] = { MVT::i32, MVT::Other };
428 // Handle store.*_pci case which has 7 operands.
429 if (IntN->getNumOperands() == 7) {
430 auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
431 SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
432 // Operands: { Base, Increment, Modifier, Value, Start, Chain }.
433 Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
434 IntN->getOperand(6), IntN->getOperand(0) };
435 } else
436 // Handle store.*_pcr case which has 6 operands.
437 // Operands: { Base, Modifier, Value, Start, Chain }.
438 Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
439 IntN->getOperand(5), IntN->getOperand(0) };
440 MachineSDNode *Res = CurDAG->getMachineNode(FSI->second, DL, RTys, Ops);
441 ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
442 ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
443 CurDAG->RemoveDeadNode(IntN);
444 return true;
445 }
446
447 return false;
448}
449
451 SDLoc dl(N);
452 LoadSDNode *LD = cast<LoadSDNode>(N);
453
454 // Handle indexed loads.
455 ISD::MemIndexedMode AM = LD->getAddressingMode();
456 if (AM != ISD::UNINDEXED) {
457 SelectIndexedLoad(LD, dl);
458 return;
459 }
460
461 // Handle patterns using circ/brev load intrinsics.
463 return;
464
465 SelectCode(LD);
466}
467
469 SDValue Chain = ST->getChain();
470 SDValue Base = ST->getBasePtr();
471 SDValue Offset = ST->getOffset();
472 SDValue Value = ST->getValue();
473 // Get the constant value.
474 int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
475 EVT StoredVT = ST->getMemoryVT();
476 EVT ValueVT = Value.getValueType();
477
478 bool IsValidInc = HII->isValidAutoIncImm(StoredVT, Inc);
479 unsigned Opcode = 0;
480
481 assert(StoredVT.isSimple());
482 switch (StoredVT.getSimpleVT().SimpleTy) {
483 case MVT::i8:
484 Opcode = IsValidInc ? Hexagon::S2_storerb_pi : Hexagon::S2_storerb_io;
485 break;
486 case MVT::i16:
487 Opcode = IsValidInc ? Hexagon::S2_storerh_pi : Hexagon::S2_storerh_io;
488 break;
489 case MVT::i32:
490 case MVT::f32:
491 case MVT::v2i16:
492 case MVT::v4i8:
493 Opcode = IsValidInc ? Hexagon::S2_storeri_pi : Hexagon::S2_storeri_io;
494 break;
495 case MVT::i64:
496 case MVT::f64:
497 case MVT::v2i32:
498 case MVT::v4i16:
499 case MVT::v8i8:
500 Opcode = IsValidInc ? Hexagon::S2_storerd_pi : Hexagon::S2_storerd_io;
501 break;
502 case MVT::v64i8:
503 case MVT::v32i16:
504 case MVT::v16i32:
505 case MVT::v8i64:
506 case MVT::v128i8:
507 case MVT::v64i16:
508 case MVT::v32i32:
509 case MVT::v16i64:
510 if (isAlignedMemNode(ST)) {
511 if (ST->isNonTemporal())
512 Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai;
513 else
514 Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai;
515 } else {
516 Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai;
517 }
518 break;
519 default:
520 llvm_unreachable("Unexpected memory type in indexed store");
521 }
522
523 if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
524 assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
525 Value = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo,
526 dl, MVT::i32, Value);
527 }
528
529 SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
530 MachineMemOperand *MemOp = ST->getMemOperand();
531
532 // Next address Chain
533 SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) };
534 SDValue To[2];
535
536 if (IsValidInc) {
537 // Build post increment store.
538 SDValue Ops[] = { Base, IncV, Value, Chain };
540 Ops);
542 To[0] = SDValue(S, 0);
543 To[1] = SDValue(S, 1);
544 } else {
546 SDValue Ops[] = { Base, Zero, Value, Chain };
547 MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
549 To[1] = SDValue(S, 0);
550 MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
551 Base, IncV);
552 To[0] = SDValue(A, 0);
553 }
554
555 ReplaceUses(From, To, 2);
557}
558
560 SDLoc dl(N);
561 StoreSDNode *ST = cast<StoreSDNode>(N);
562
563 // Handle indexed stores.
564 ISD::MemIndexedMode AM = ST->getAddressingMode();
565 if (AM != ISD::UNINDEXED) {
566 SelectIndexedStore(ST, dl);
567 return;
568 }
569
570 SelectCode(ST);
571}
572
574 SDLoc dl(N);
575 SDValue Shl_0 = N->getOperand(0);
576 SDValue Shl_1 = N->getOperand(1);
577
578 auto Default = [this,N] () -> void { SelectCode(N); };
579
580 if (N->getValueType(0) != MVT::i32 || Shl_1.getOpcode() != ISD::Constant)
581 return Default();
582
583 // RHS is const.
584 int32_t ShlConst = cast<ConstantSDNode>(Shl_1)->getSExtValue();
585
586 if (Shl_0.getOpcode() == ISD::MUL) {
587 SDValue Mul_0 = Shl_0.getOperand(0); // Val
588 SDValue Mul_1 = Shl_0.getOperand(1); // Const
589 // RHS of mul is const.
590 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mul_1)) {
591 int32_t ValConst = C->getSExtValue() << ShlConst;
592 if (isInt<9>(ValConst)) {
593 SDValue Val = CurDAG->getTargetConstant(ValConst, dl, MVT::i32);
594 SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
595 MVT::i32, Mul_0, Val);
596 ReplaceNode(N, Result);
597 return;
598 }
599 }
600 return Default();
601 }
602
603 if (Shl_0.getOpcode() == ISD::SUB) {
604 SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
605 SDValue Sub_1 = Shl_0.getOperand(1); // Val
606 if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Sub_0)) {
607 if (C1->getSExtValue() != 0 || Sub_1.getOpcode() != ISD::SHL)
608 return Default();
609 SDValue Shl2_0 = Sub_1.getOperand(0); // Val
610 SDValue Shl2_1 = Sub_1.getOperand(1); // Const
611 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(Shl2_1)) {
612 int32_t ValConst = 1 << (ShlConst + C2->getSExtValue());
613 if (isInt<9>(-ValConst)) {
614 SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, MVT::i32);
615 SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
616 MVT::i32, Shl2_0, Val);
617 ReplaceNode(N, Result);
618 return;
619 }
620 }
621 }
622 }
623
624 return Default();
625}
626
627//
628// Handling intrinsics for circular load and bitreverse load.
629//
634 return;
635 }
636
637 // Handle bit-reverse load intrinsics.
639 return;
640
642 return;
643
644 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
645 if (IntNo == Intrinsic::hexagon_V6_vgathermw ||
646 IntNo == Intrinsic::hexagon_V6_vgathermw_128B ||
647 IntNo == Intrinsic::hexagon_V6_vgathermh ||
648 IntNo == Intrinsic::hexagon_V6_vgathermh_128B ||
649 IntNo == Intrinsic::hexagon_V6_vgathermhw ||
650 IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) {
652 return;
653 }
654 if (IntNo == Intrinsic::hexagon_V6_vgathermwq ||
655 IntNo == Intrinsic::hexagon_V6_vgathermwq_128B ||
656 IntNo == Intrinsic::hexagon_V6_vgathermhq ||
657 IntNo == Intrinsic::hexagon_V6_vgathermhq_128B ||
658 IntNo == Intrinsic::hexagon_V6_vgathermhwq ||
659 IntNo == Intrinsic::hexagon_V6_vgathermhwq_128B) {
661 return;
662 }
663
664 SelectCode(N);
665}
666
668 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
669 unsigned Bits;
670 switch (IID) {
671 case Intrinsic::hexagon_S2_vsplatrb:
672 Bits = 8;
673 break;
674 case Intrinsic::hexagon_S2_vsplatrh:
675 Bits = 16;
676 break;
677 case Intrinsic::hexagon_V6_vaddcarry:
678 case Intrinsic::hexagon_V6_vaddcarry_128B:
679 case Intrinsic::hexagon_V6_vsubcarry:
680 case Intrinsic::hexagon_V6_vsubcarry_128B:
682 return;
683 default:
684 SelectCode(N);
685 return;
686 }
687
688 SDValue V = N->getOperand(1);
689 SDValue U;
690 // Splat intrinsics.
691 if (keepsLowBits(V, Bits, U)) {
692 SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
693 N->getOperand(0), U);
694 ReplaceNode(N, R.getNode());
695 SelectCode(R.getNode());
696 return;
697 }
698 SelectCode(N);
699}
700
702 SDValue Inp = N->getOperand(0);
703 MVT ResTy = N->getValueType(0).getSimpleVT();
704 auto IdxN = cast<ConstantSDNode>(N->getOperand(1));
705 unsigned Idx = IdxN->getZExtValue();
706
707 [[maybe_unused]] MVT InpTy = Inp.getValueType().getSimpleVT();
708 [[maybe_unused]] unsigned ResLen = ResTy.getVectorNumElements();
710 assert(2 * ResLen == InpTy.getVectorNumElements());
711 assert(ResTy.getSizeInBits() == 32);
712 assert(Idx == 0 || Idx == ResLen);
713
714 unsigned SubReg = Idx == 0 ? Hexagon::isub_lo : Hexagon::isub_hi;
715 SDValue Ext = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(N), ResTy, Inp);
716
717 ReplaceNode(N, Ext.getNode());
718}
719
720//
721// Map floating point constant values.
722//
724 SDLoc dl(N);
725 auto *CN = cast<ConstantFPSDNode>(N);
726 APInt A = CN->getValueAPF().bitcastToAPInt();
727 if (N->getValueType(0) == MVT::f32) {
728 SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32);
729 ReplaceNode(N, CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, V));
730 return;
731 }
732 if (N->getValueType(0) == MVT::f64) {
733 SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i64);
734 ReplaceNode(N, CurDAG->getMachineNode(Hexagon::CONST64, dl, MVT::f64, V));
735 return;
736 }
737
738 SelectCode(N);
739}
740
741//
742// Map boolean values.
743//
745 if (N->getValueType(0) == MVT::i1) {
746 assert(!(cast<ConstantSDNode>(N)->getZExtValue() >> 1));
747 unsigned Opc = (cast<ConstantSDNode>(N)->getSExtValue() != 0)
748 ? Hexagon::PS_true
749 : Hexagon::PS_false;
751 return;
752 }
753
754 SelectCode(N);
755}
756
759 const HexagonFrameLowering *HFI = HST->getFrameLowering();
760 int FX = cast<FrameIndexSDNode>(N)->getIndex();
761 Align StkA = HFI->getStackAlign();
762 Align MaxA = MFI.getMaxAlign();
764 SDLoc DL(N);
766 SDNode *R = nullptr;
767
768 // Use PS_fi when:
769 // - the object is fixed, or
770 // - there are no objects with higher-than-default alignment, or
771 // - there are no dynamically allocated objects.
772 // Otherwise, use PS_fia.
773 if (FX < 0 || MaxA <= StkA || !MFI.hasVarSizedObjects()) {
774 R = CurDAG->getMachineNode(Hexagon::PS_fi, DL, MVT::i32, FI, Zero);
775 } else {
776 auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
777 Register AR = HMFI.getStackAlignBaseReg();
779 SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
780 R = CurDAG->getMachineNode(Hexagon::PS_fia, DL, MVT::i32, Ops);
781 }
782
783 ReplaceNode(N, R);
784}
785
787 unsigned OpcCarry = N->getOpcode() == HexagonISD::ADDC ? Hexagon::A4_addp_c
788 : Hexagon::A4_subp_c;
789 SDNode *C = CurDAG->getMachineNode(OpcCarry, SDLoc(N), N->getVTList(),
790 { N->getOperand(0), N->getOperand(1),
791 N->getOperand(2) });
792 ReplaceNode(N, C);
793}
794
796 MVT ResTy = N->getValueType(0).getSimpleVT();
797 if (HST->isHVXVectorType(ResTy, true))
798 return SelectHvxVAlign(N);
799
800 const SDLoc &dl(N);
801 unsigned VecLen = ResTy.getSizeInBits();
802 if (VecLen == 32) {
803 SDValue Ops[] = {
804 CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
805 N->getOperand(0),
806 CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
807 N->getOperand(1),
808 CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
809 };
810 SDNode *R = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
811 MVT::i64, Ops);
812
813 // Shift right by "(Addr & 0x3) * 8" bytes.
814 SDNode *C;
817 if (HST->useCompound()) {
818 C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32,
819 M0, N->getOperand(2), M1);
820 } else {
821 SDNode *T = CurDAG->getMachineNode(Hexagon::S2_asl_i_r, dl, MVT::i32,
822 N->getOperand(2), M1);
823 C = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
824 SDValue(T, 0), M0);
825 }
826 SDNode *S = CurDAG->getMachineNode(Hexagon::S2_lsr_r_p, dl, MVT::i64,
827 SDValue(R, 0), SDValue(C, 0));
828 SDValue E = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, ResTy,
829 SDValue(S, 0));
830 ReplaceNode(N, E.getNode());
831 } else {
832 assert(VecLen == 64);
833 SDNode *Pu = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::v8i1,
834 N->getOperand(2));
835 SDNode *VA = CurDAG->getMachineNode(Hexagon::S2_valignrb, dl, ResTy,
836 N->getOperand(0), N->getOperand(1),
837 SDValue(Pu,0));
838 ReplaceNode(N, VA);
839 }
840}
841
843 const SDLoc &dl(N);
844 SDValue A = N->getOperand(1);
845 int Mask = -cast<ConstantSDNode>(A.getNode())->getSExtValue();
846 assert(isPowerOf2_32(-Mask));
847
849 SDNode *AA = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
850 N->getOperand(0), M);
851 ReplaceNode(N, AA);
852}
853
854// Handle these nodes here to avoid having to write patterns for all
855// combinations of input/output types. In all cases, the resulting
856// instruction is the same.
858 SDValue Op = N->getOperand(0);
859 MVT OpTy = Op.getValueType().getSimpleVT();
860 SDNode *T = CurDAG->MorphNodeTo(N, N->getOpcode(),
861 CurDAG->getVTList(OpTy), {Op});
862 ReplaceNode(T, Op.getNode());
863}
864
866 MVT ResTy = N->getValueType(0).getSimpleVT();
867 SDNode *T = CurDAG->getMachineNode(Hexagon::C2_mask, SDLoc(N), ResTy,
868 N->getOperand(0));
869 ReplaceNode(N, T);
870}
871
873 const SDLoc &dl(N);
874 MVT ResTy = N->getValueType(0).getSimpleVT();
876 SDNode *T = CurDAG->getMachineNode(Hexagon::A4_vcmpbgtui, dl, ResTy,
877 N->getOperand(0), Zero);
878 ReplaceNode(N, T);
879}
880
882 const SDLoc &dl(N);
883 MVT ResTy = N->getValueType(0).getSimpleVT();
884 // The argument to V2Q should be a single vector.
885 MVT OpTy = N->getOperand(0).getValueType().getSimpleVT(); (void)OpTy;
886 assert(HST->getVectorLength() * 8 == OpTy.getSizeInBits());
887
889 SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
890 SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandvrt, dl, ResTy,
891 N->getOperand(0), SDValue(R,0));
892 ReplaceNode(N, T);
893}
894
896 const SDLoc &dl(N);
897 MVT ResTy = N->getValueType(0).getSimpleVT();
898 // The result of V2Q should be a single vector.
899 assert(HST->getVectorLength() * 8 == ResTy.getSizeInBits());
900
902 SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
903 SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandqrt, dl, ResTy,
904 N->getOperand(0), SDValue(R,0));
905 ReplaceNode(N, T);
906}
907
909 if (N->isMachineOpcode())
910 return N->setNodeId(-1); // Already selected.
911
912 auto isHvxOp = [this](SDNode *N) {
913 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
914 if (HST->isHVXVectorType(N->getValueType(i), true))
915 return true;
916 }
917 for (SDValue I : N->ops()) {
918 if (HST->isHVXVectorType(I.getValueType(), true))
919 return true;
920 }
921 return false;
922 };
923
924 if (HST->useHVXOps() && isHvxOp(N)) {
925 switch (N->getOpcode()) {
926 case ISD::EXTRACT_SUBVECTOR: return SelectHvxExtractSubvector(N);
927 case ISD::VECTOR_SHUFFLE: return SelectHvxShuffle(N);
928
929 case HexagonISD::VROR: return SelectHvxRor(N);
930 }
931 }
932
933 switch (N->getOpcode()) {
934 case ISD::Constant: return SelectConstant(N);
935 case ISD::ConstantFP: return SelectConstantFP(N);
936 case ISD::FrameIndex: return SelectFrameIndex(N);
937 case ISD::SHL: return SelectSHL(N);
938 case ISD::LOAD: return SelectLoad(N);
939 case ISD::STORE: return SelectStore(N);
943
944 case HexagonISD::ADDC:
946 case HexagonISD::VALIGN: return SelectVAlign(N);
949 case HexagonISD::P2D: return SelectP2D(N);
950 case HexagonISD::D2P: return SelectD2P(N);
951 case HexagonISD::Q2V: return SelectQ2V(N);
952 case HexagonISD::V2Q: return SelectV2Q(N);
953 }
954
955 SelectCode(N);
956}
957
959SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
960 std::vector<SDValue> &OutOps) {
961 SDValue Inp = Op, Res;
962
963 switch (ConstraintID) {
964 default:
965 return true;
966 case InlineAsm::Constraint_o: // Offsetable.
967 case InlineAsm::Constraint_v: // Not offsetable.
968 case InlineAsm::Constraint_m: // Memory.
969 if (SelectAddrFI(Inp, Res))
970 OutOps.push_back(Res);
971 else
972 OutOps.push_back(Inp);
973 break;
974 }
975
976 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
977 return false;
978}
979
980
981static bool isMemOPCandidate(SDNode *I, SDNode *U) {
982 // I is an operand of U. Check if U is an arithmetic (binary) operation
983 // usable in a memop, where the other operand is a loaded value, and the
984 // result of U is stored in the same location.
985
986 if (!U->hasOneUse())
987 return false;
988 unsigned Opc = U->getOpcode();
989 switch (Opc) {
990 case ISD::ADD:
991 case ISD::SUB:
992 case ISD::AND:
993 case ISD::OR:
994 break;
995 default:
996 return false;
997 }
998
999 SDValue S0 = U->getOperand(0);
1000 SDValue S1 = U->getOperand(1);
1001 SDValue SY = (S0.getNode() == I) ? S1 : S0;
1002
1003 SDNode *UUse = *U->use_begin();
1004 if (UUse->getNumValues() != 1)
1005 return false;
1006
1007 // Check if one of the inputs to U is a load instruction and the output
1008 // is used by a store instruction. If so and they also have the same
1009 // base pointer, then don't preoprocess this node sequence as it
1010 // can be matched to a memop.
1011 SDNode *SYNode = SY.getNode();
1012 if (UUse->getOpcode() == ISD::STORE && SYNode->getOpcode() == ISD::LOAD) {
1013 SDValue LDBasePtr = cast<MemSDNode>(SYNode)->getBasePtr();
1014 SDValue STBasePtr = cast<MemSDNode>(UUse)->getBasePtr();
1015 if (LDBasePtr == STBasePtr)
1016 return true;
1017 }
1018 return false;
1019}
1020
1021
1022// Transform: (or (select c x 0) z) -> (select c (or x z) z)
1023// (or (select c 0 y) z) -> (select c z (or y z))
1024void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
1025 SelectionDAG &DAG = *CurDAG;
1026
1027 for (auto *I : Nodes) {
1028 if (I->getOpcode() != ISD::OR)
1029 continue;
1030
1031 auto IsZero = [] (const SDValue &V) -> bool {
1032 if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode()))
1033 return SC->isZero();
1034 return false;
1035 };
1036 auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
1037 if (Op.getOpcode() != ISD::SELECT)
1038 return false;
1039 return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2));
1040 };
1041
1042 SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
1043 EVT VT = I->getValueType(0);
1044 bool SelN0 = IsSelect0(N0);
1045 SDValue SOp = SelN0 ? N0 : N1;
1046 SDValue VOp = SelN0 ? N1 : N0;
1047
1048 if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) {
1049 SDValue SC = SOp.getOperand(0);
1050 SDValue SX = SOp.getOperand(1);
1051 SDValue SY = SOp.getOperand(2);
1052 SDLoc DLS = SOp;
1053 if (IsZero(SY)) {
1054 SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
1055 SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
1056 DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1057 } else if (IsZero(SX)) {
1058 SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
1059 SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
1060 DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1061 }
1062 }
1063 }
1064}
1065
1066// Transform: (store ch val (add x (add (shl y c) e)))
1067// to: (store ch val (add x (shl (add y d) c))),
1068// where e = (shl d c) for some integer d.
1069// The purpose of this is to enable generation of loads/stores with
1070// shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1071// value c must be 0, 1 or 2.
1072void HexagonDAGToDAGISel::ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes) {
1073 SelectionDAG &DAG = *CurDAG;
1074
1075 for (auto *I : Nodes) {
1076 if (I->getOpcode() != ISD::STORE)
1077 continue;
1078
1079 // I matched: (store ch val Off)
1080 SDValue Off = I->getOperand(2);
1081 // Off needs to match: (add x (add (shl y c) (shl d c))))
1082 if (Off.getOpcode() != ISD::ADD)
1083 continue;
1084 // Off matched: (add x T0)
1085 SDValue T0 = Off.getOperand(1);
1086 // T0 needs to match: (add T1 T2):
1087 if (T0.getOpcode() != ISD::ADD)
1088 continue;
1089 // T0 matched: (add T1 T2)
1090 SDValue T1 = T0.getOperand(0);
1091 SDValue T2 = T0.getOperand(1);
1092 // T1 needs to match: (shl y c)
1093 if (T1.getOpcode() != ISD::SHL)
1094 continue;
1095 SDValue C = T1.getOperand(1);
1096 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(C.getNode());
1097 if (CN == nullptr)
1098 continue;
1099 unsigned CV = CN->getZExtValue();
1100 if (CV > 2)
1101 continue;
1102 // T2 needs to match e, where e = (shl d c) for some d.
1103 ConstantSDNode *EN = dyn_cast<ConstantSDNode>(T2.getNode());
1104 if (EN == nullptr)
1105 continue;
1106 unsigned EV = EN->getZExtValue();
1107 if (EV % (1 << CV) != 0)
1108 continue;
1109 unsigned DV = EV / (1 << CV);
1110
1111 // Replace T0 with: (shl (add y d) c)
1112 SDLoc DL = SDLoc(I);
1113 EVT VT = T0.getValueType();
1114 SDValue D = DAG.getConstant(DV, DL, VT);
1115 // NewAdd = (add y d)
1116 SDValue NewAdd = DAG.getNode(ISD::ADD, DL, VT, T1.getOperand(0), D);
1117 // NewShl = (shl NewAdd c)
1118 SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
1119 ReplaceNode(T0.getNode(), NewShl.getNode());
1120 }
1121}
1122
1123// Transform: (load ch (add x (and (srl y c) Mask)))
1124// to: (load ch (add x (shl (srl y d) d-c)))
1125// where
1126// Mask = 00..0 111..1 0.0
1127// | | +-- d-c 0s, and d-c is 0, 1 or 2.
1128// | +-------- 1s
1129// +-------------- at most c 0s
1130// Motivating example:
1131// DAG combiner optimizes (add x (shl (srl y 5) 2))
1132// to (add x (and (srl y 3) 1FFFFFFC))
1133// which results in a constant-extended and(##...,lsr). This transformation
1134// undoes this simplification for cases where the shl can be folded into
1135// an addressing mode.
1136void HexagonDAGToDAGISel::ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes) {
1137 SelectionDAG &DAG = *CurDAG;
1138
1139 for (SDNode *N : Nodes) {
1140 unsigned Opc = N->getOpcode();
1141 if (Opc != ISD::LOAD && Opc != ISD::STORE)
1142 continue;
1143 SDValue Addr = Opc == ISD::LOAD ? N->getOperand(1) : N->getOperand(2);
1144 // Addr must match: (add x T0)
1145 if (Addr.getOpcode() != ISD::ADD)
1146 continue;
1147 SDValue T0 = Addr.getOperand(1);
1148 // T0 must match: (and T1 Mask)
1149 if (T0.getOpcode() != ISD::AND)
1150 continue;
1151
1152 // We have an AND.
1153 //
1154 // Check the first operand. It must be: (srl y c).
1155 SDValue S = T0.getOperand(0);
1156 if (S.getOpcode() != ISD::SRL)
1157 continue;
1158 ConstantSDNode *SN = dyn_cast<ConstantSDNode>(S.getOperand(1).getNode());
1159 if (SN == nullptr)
1160 continue;
1161 if (SN->getAPIntValue().getBitWidth() != 32)
1162 continue;
1163 uint32_t CV = SN->getZExtValue();
1164
1165 // Check the second operand: the supposed mask.
1166 ConstantSDNode *MN = dyn_cast<ConstantSDNode>(T0.getOperand(1).getNode());
1167 if (MN == nullptr)
1168 continue;
1169 if (MN->getAPIntValue().getBitWidth() != 32)
1170 continue;
1171 uint32_t Mask = MN->getZExtValue();
1172 // Examine the mask.
1173 uint32_t TZ = llvm::countr_zero(Mask);
1174 uint32_t M1 = llvm::countr_one(Mask >> TZ);
1175 uint32_t LZ = llvm::countl_zero(Mask);
1176 // Trailing zeros + middle ones + leading zeros must equal the width.
1177 if (TZ + M1 + LZ != 32)
1178 continue;
1179 // The number of trailing zeros will be encoded in the addressing mode.
1180 if (TZ > 2)
1181 continue;
1182 // The number of leading zeros must be at most c.
1183 if (LZ > CV)
1184 continue;
1185
1186 // All looks good.
1187 SDValue Y = S.getOperand(0);
1188 EVT VT = Addr.getValueType();
1189 SDLoc dl(S);
1190 // TZ = D-C, so D = TZ+C.
1191 SDValue D = DAG.getConstant(TZ+CV, dl, VT);
1192 SDValue DC = DAG.getConstant(TZ, dl, VT);
1193 SDValue NewSrl = DAG.getNode(ISD::SRL, dl, VT, Y, D);
1194 SDValue NewShl = DAG.getNode(ISD::SHL, dl, VT, NewSrl, DC);
1195 ReplaceNode(T0.getNode(), NewShl.getNode());
1196 }
1197}
1198
1199// Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1200// (op ... 1 ...))
1201void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) {
1202 SelectionDAG &DAG = *CurDAG;
1203
1204 for (SDNode *N : Nodes) {
1205 unsigned Opc = N->getOpcode();
1206 if (Opc != ISD::ZERO_EXTEND)
1207 continue;
1208 SDValue OpI1 = N->getOperand(0);
1209 EVT OpVT = OpI1.getValueType();
1210 if (!OpVT.isSimple() || OpVT.getSimpleVT() != MVT::i1)
1211 continue;
1212 for (auto I = N->use_begin(), E = N->use_end(); I != E; ++I) {
1213 SDNode *U = *I;
1214 if (U->getNumValues() != 1)
1215 continue;
1216 EVT UVT = U->getValueType(0);
1217 if (!UVT.isSimple() || !UVT.isInteger() || UVT.getSimpleVT() == MVT::i1)
1218 continue;
1219 // Do not generate select for all i1 vector type.
1220 if (UVT.isVector() && UVT.getVectorElementType() == MVT::i1)
1221 continue;
1222 if (isMemOPCandidate(N, U))
1223 continue;
1224
1225 // Potentially simplifiable operation.
1226 unsigned I1N = I.getOperandNo();
1227 SmallVector<SDValue,2> Ops(U->getNumOperands());
1228 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i)
1229 Ops[i] = U->getOperand(i);
1230 EVT BVT = Ops[I1N].getValueType();
1231
1232 const SDLoc &dl(U);
1233 SDValue C0 = DAG.getConstant(0, dl, BVT);
1234 SDValue C1 = DAG.getConstant(1, dl, BVT);
1235 SDValue If0, If1;
1236
1237 if (isa<MachineSDNode>(U)) {
1238 unsigned UseOpc = U->getMachineOpcode();
1239 Ops[I1N] = C0;
1240 If0 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1241 Ops[I1N] = C1;
1242 If1 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1243 } else {
1244 unsigned UseOpc = U->getOpcode();
1245 Ops[I1N] = C0;
1246 If0 = DAG.getNode(UseOpc, dl, UVT, Ops);
1247 Ops[I1N] = C1;
1248 If1 = DAG.getNode(UseOpc, dl, UVT, Ops);
1249 }
1250 // We're generating a SELECT way after legalization, so keep the types
1251 // simple.
1252 unsigned UW = UVT.getSizeInBits();
1253 EVT SVT = (UW == 32 || UW == 64) ? MVT::getIntegerVT(UW) : UVT;
1254 SDValue Sel = DAG.getNode(ISD::SELECT, dl, SVT, OpI1,
1255 DAG.getBitcast(SVT, If1),
1256 DAG.getBitcast(SVT, If0));
1257 SDValue Ret = DAG.getBitcast(UVT, Sel);
1258 DAG.ReplaceAllUsesWith(U, Ret.getNode());
1259 }
1260 }
1261}
1262
1264 // Repack all nodes before calling each preprocessing function,
1265 // because each of them can modify the set of nodes.
1266 auto getNodes = [this]() -> std::vector<SDNode *> {
1267 std::vector<SDNode *> T;
1268 T.reserve(CurDAG->allnodes_size());
1269 for (SDNode &N : CurDAG->allnodes())
1270 T.push_back(&N);
1271 return T;
1272 };
1273
1274 if (HST->useHVXOps())
1275 PreprocessHvxISelDAG();
1276
1277 // Transform: (or (select c x 0) z) -> (select c (or x z) z)
1278 // (or (select c 0 y) z) -> (select c z (or y z))
1279 ppSimplifyOrSelect0(getNodes());
1280
1281 // Transform: (store ch val (add x (add (shl y c) e)))
1282 // to: (store ch val (add x (shl (add y d) c))),
1283 // where e = (shl d c) for some integer d.
1284 // The purpose of this is to enable generation of loads/stores with
1285 // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1286 // value c must be 0, 1 or 2.
1287 ppAddrReorderAddShl(getNodes());
1288
1289 // Transform: (load ch (add x (and (srl y c) Mask)))
1290 // to: (load ch (add x (shl (srl y d) d-c)))
1291 // where
1292 // Mask = 00..0 111..1 0.0
1293 // | | +-- d-c 0s, and d-c is 0, 1 or 2.
1294 // | +-------- 1s
1295 // +-------------- at most c 0s
1296 // Motivating example:
1297 // DAG combiner optimizes (add x (shl (srl y 5) 2))
1298 // to (add x (and (srl y 3) 1FFFFFFC))
1299 // which results in a constant-extended and(##...,lsr). This transformation
1300 // undoes this simplification for cases where the shl can be folded into
1301 // an addressing mode.
1302 ppAddrRewriteAndSrl(getNodes());
1303
1304 // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1305 // (op ... 1 ...))
1306 ppHoistZextI1(getNodes());
1307
1308 DEBUG_WITH_TYPE("isel", {
1309 dbgs() << "Preprocessed (Hexagon) selection DAG:";
1310 CurDAG->dump();
1311 });
1312
1314 rebalanceAddressTrees();
1315
1316 DEBUG_WITH_TYPE("isel", {
1317 dbgs() << "Address tree balanced selection DAG:";
1318 CurDAG->dump();
1319 });
1320 }
1321}
1322
1324 auto &HST = MF->getSubtarget<HexagonSubtarget>();
1325 auto &HFI = *HST.getFrameLowering();
1326 if (!HFI.needsAligna(*MF))
1327 return;
1328
1330 MachineBasicBlock *EntryBB = &MF->front();
1331 Align EntryMaxA = MFI.getMaxAlign();
1332
1333 // Reserve the first non-volatile register.
1334 Register AP = 0;
1335 auto &HRI = *HST.getRegisterInfo();
1337 for (const MCPhysReg *R = HRI.getCalleeSavedRegs(MF); *R; ++R) {
1338 if (Reserved[*R])
1339 continue;
1340 AP = *R;
1341 break;
1342 }
1343 assert(AP.isValid() && "Couldn't reserve stack align register");
1344 BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AP)
1345 .addImm(EntryMaxA.value());
1346 MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseReg(AP);
1347}
1348
1349void HexagonDAGToDAGISel::updateAligna() {
1350 auto &HFI = *MF->getSubtarget<HexagonSubtarget>().getFrameLowering();
1351 if (!HFI.needsAligna(*MF))
1352 return;
1353 auto *AlignaI = const_cast<MachineInstr*>(HFI.getAlignaInstr(*MF));
1354 assert(AlignaI != nullptr);
1355 unsigned MaxA = MF->getFrameInfo().getMaxAlign().value();
1356 if (AlignaI->getOperand(1).getImm() < MaxA)
1357 AlignaI->getOperand(1).setImm(MaxA);
1358}
1359
1360// Match a frame index that can be used in an addressing mode.
1362 if (N.getOpcode() != ISD::FrameIndex)
1363 return false;
1364 auto &HFI = *HST->getFrameLowering();
1366 int FX = cast<FrameIndexSDNode>(N)->getIndex();
1367 if (!MFI.isFixedObjectIndex(FX) && HFI.needsAligna(*MF))
1368 return false;
1370 return true;
1371}
1372
1374 return SelectGlobalAddress(N, R, false, Align(1));
1375}
1376
1378 return SelectGlobalAddress(N, R, true, Align(1));
1379}
1380
1382 return SelectAnyImmediate(N, R, Align(1));
1383}
1384
1386 return SelectAnyImmediate(N, R, Align(1));
1387}
1389 return SelectAnyImmediate(N, R, Align(2));
1390}
1392 return SelectAnyImmediate(N, R, Align(4));
1393}
1395 return SelectAnyImmediate(N, R, Align(8));
1396}
1397
1399 EVT T = N.getValueType();
1400 if (!T.isInteger() || T.getSizeInBits() != 32 || !isa<ConstantSDNode>(N))
1401 return false;
1402 int32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
1403 R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
1404 return true;
1405}
1406
1408 Align Alignment) {
1409 switch (N.getOpcode()) {
1410 case ISD::Constant: {
1411 if (N.getValueType() != MVT::i32)
1412 return false;
1413 int32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
1414 if (!isAligned(Alignment, V))
1415 return false;
1416 R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
1417 return true;
1418 }
1419 case HexagonISD::JT:
1420 case HexagonISD::CP:
1421 // These are assumed to always be aligned at least 8-byte boundary.
1422 if (Alignment > Align(8))
1423 return false;
1424 R = N.getOperand(0);
1425 return true;
1427 // Symbols may be aligned at any boundary.
1428 if (Alignment > Align(1))
1429 return false;
1430 R = N;
1431 return true;
1432 case ISD::BlockAddress:
1433 // Block address is always aligned at least 4-byte boundary.
1434 if (Alignment > Align(4) ||
1435 !isAligned(Alignment, cast<BlockAddressSDNode>(N)->getOffset()))
1436 return false;
1437 R = N;
1438 return true;
1439 }
1440
1441 if (SelectGlobalAddress(N, R, false, Alignment) ||
1442 SelectGlobalAddress(N, R, true, Alignment))
1443 return true;
1444
1445 return false;
1446}
1447
1449 bool UseGP, Align Alignment) {
1450 switch (N.getOpcode()) {
1451 case ISD::ADD: {
1452 SDValue N0 = N.getOperand(0);
1453 SDValue N1 = N.getOperand(1);
1454 unsigned GAOpc = N0.getOpcode();
1455 if (UseGP && GAOpc != HexagonISD::CONST32_GP)
1456 return false;
1457 if (!UseGP && GAOpc != HexagonISD::CONST32)
1458 return false;
1459 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) {
1460 if (!isAligned(Alignment, Const->getZExtValue()))
1461 return false;
1462 SDValue Addr = N0.getOperand(0);
1463 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) {
1464 if (GA->getOpcode() == ISD::TargetGlobalAddress) {
1465 uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
1466 R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const),
1467 N.getValueType(), NewOff);
1468 return true;
1469 }
1470 }
1471 }
1472 break;
1473 }
1474 case HexagonISD::CP:
1475 case HexagonISD::JT:
1477 // The operand(0) of CONST32 is TargetGlobalAddress, which is what we
1478 // want in the instruction.
1479 if (!UseGP)
1480 R = N.getOperand(0);
1481 return !UseGP;
1483 if (UseGP)
1484 R = N.getOperand(0);
1485 return UseGP;
1486 default:
1487 return false;
1488 }
1489
1490 return false;
1491}
1492
1494 // This (complex pattern) function is meant to detect a sign-extension
1495 // i32->i64 on a per-operand basis. This would allow writing single
1496 // patterns that would cover a number of combinations of different ways
1497 // a sign-extensions could be written. For example:
1498 // (mul (DetectUseSxtw x) (DetectUseSxtw y)) -> (M2_dpmpyss_s0 x y)
1499 // could match either one of these:
1500 // (mul (sext x) (sext_inreg y))
1501 // (mul (sext-load *p) (sext_inreg y))
1502 // (mul (sext_inreg x) (sext y))
1503 // etc.
1504 //
1505 // The returned value will have type i64 and its low word will
1506 // contain the value being extended. The high bits are not specified.
1507 // The returned type is i64 because the original type of N was i64,
1508 // but the users of this function should only use the low-word of the
1509 // result, e.g.
1510 // (mul sxtw:x, sxtw:y) -> (M2_dpmpyss_s0 (LoReg sxtw:x), (LoReg sxtw:y))
1511
1512 if (N.getValueType() != MVT::i64)
1513 return false;
1514 unsigned Opc = N.getOpcode();
1515 switch (Opc) {
1516 case ISD::SIGN_EXTEND:
1518 // sext_inreg has the source type as a separate operand.
1519 EVT T = Opc == ISD::SIGN_EXTEND
1520 ? N.getOperand(0).getValueType()
1521 : cast<VTSDNode>(N.getOperand(1))->getVT();
1522 unsigned SW = T.getSizeInBits();
1523 if (SW == 32)
1524 R = N.getOperand(0);
1525 else if (SW < 32)
1526 R = N;
1527 else
1528 return false;
1529 break;
1530 }
1531 case ISD::LOAD: {
1532 LoadSDNode *L = cast<LoadSDNode>(N);
1533 if (L->getExtensionType() != ISD::SEXTLOAD)
1534 return false;
1535 // All extending loads extend to i32, so even if the value in
1536 // memory is shorter than 32 bits, it will be i32 after the load.
1537 if (L->getMemoryVT().getSizeInBits() > 32)
1538 return false;
1539 R = N;
1540 break;
1541 }
1542 case ISD::SRA: {
1543 auto *S = dyn_cast<ConstantSDNode>(N.getOperand(1));
1544 if (!S || S->getZExtValue() != 32)
1545 return false;
1546 R = N;
1547 break;
1548 }
1549 default:
1550 return false;
1551 }
1552 EVT RT = R.getValueType();
1553 if (RT == MVT::i64)
1554 return true;
1555 assert(RT == MVT::i32);
1556 // This is only to produce a value of type i64. Do not rely on the
1557 // high bits produced by this.
1558 const SDLoc &dl(N);
1559 SDValue Ops[] = {
1560 CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
1561 R, CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
1562 R, CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
1563 };
1564 SDNode *T = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
1565 MVT::i64, Ops);
1566 R = SDValue(T, 0);
1567 return true;
1568}
1569
1570bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
1571 SDValue &Src) {
1572 unsigned Opc = Val.getOpcode();
1573 switch (Opc) {
1574 case ISD::SIGN_EXTEND:
1575 case ISD::ZERO_EXTEND:
1576 case ISD::ANY_EXTEND: {
1577 const SDValue &Op0 = Val.getOperand(0);
1578 EVT T = Op0.getValueType();
1579 if (T.isInteger() && T.getSizeInBits() == NumBits) {
1580 Src = Op0;
1581 return true;
1582 }
1583 break;
1584 }
1586 case ISD::AssertSext:
1587 case ISD::AssertZext:
1588 if (Val.getOperand(0).getValueType().isInteger()) {
1589 VTSDNode *T = cast<VTSDNode>(Val.getOperand(1));
1590 if (T->getVT().getSizeInBits() == NumBits) {
1591 Src = Val.getOperand(0);
1592 return true;
1593 }
1594 }
1595 break;
1596 case ISD::AND: {
1597 // Check if this is an AND with NumBits of lower bits set to 1.
1598 uint64_t Mask = (1ULL << NumBits) - 1;
1599 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1600 if (C->getZExtValue() == Mask) {
1601 Src = Val.getOperand(1);
1602 return true;
1603 }
1604 }
1605 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1606 if (C->getZExtValue() == Mask) {
1607 Src = Val.getOperand(0);
1608 return true;
1609 }
1610 }
1611 break;
1612 }
1613 case ISD::OR:
1614 case ISD::XOR: {
1615 // OR/XOR with the lower NumBits bits set to 0.
1616 uint64_t Mask = (1ULL << NumBits) - 1;
1617 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1618 if ((C->getZExtValue() & Mask) == 0) {
1619 Src = Val.getOperand(1);
1620 return true;
1621 }
1622 }
1623 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1624 if ((C->getZExtValue() & Mask) == 0) {
1625 Src = Val.getOperand(0);
1626 return true;
1627 }
1628 }
1629 break;
1630 }
1631 default:
1632 break;
1633 }
1634 return false;
1635}
1636
1637bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
1638 return N->getAlign().value() >= N->getMemoryVT().getStoreSize();
1639}
1640
1641bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const {
1642 unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF);
1643 switch (N->getMemoryVT().getStoreSize()) {
1644 case 1:
1645 return StackSize <= 56; // 1*2^6 - 8
1646 case 2:
1647 return StackSize <= 120; // 2*2^6 - 8
1648 case 4:
1649 return StackSize <= 248; // 4*2^6 - 8
1650 default:
1651 return false;
1652 }
1653}
1654
1655// Return true when the given node fits in a positive half word.
1656bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const {
1657 if (const ConstantSDNode *CN = dyn_cast<const ConstantSDNode>(N)) {
1658 int64_t V = CN->getSExtValue();
1659 return V > 0 && isInt<16>(V);
1660 }
1661 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
1662 const VTSDNode *VN = dyn_cast<const VTSDNode>(N->getOperand(1));
1663 return VN->getVT().getSizeInBits() <= 16;
1664 }
1665 return false;
1666}
1667
1668bool HexagonDAGToDAGISel::hasOneUse(const SDNode *N) const {
1669 return !CheckSingleUse || N->hasOneUse();
1670}
1671
1672////////////////////////////////////////////////////////////////////////////////
1673// Rebalancing of address calculation trees
1674
1675static bool isOpcodeHandled(const SDNode *N) {
1676 switch (N->getOpcode()) {
1677 case ISD::ADD:
1678 case ISD::MUL:
1679 return true;
1680 case ISD::SHL:
1681 // We only handle constant shifts because these can be easily flattened
1682 // into multiplications by 2^Op1.
1683 return isa<ConstantSDNode>(N->getOperand(1).getNode());
1684 default:
1685 return false;
1686 }
1687}
1688
1689/// Return the weight of an SDNode
1690int HexagonDAGToDAGISel::getWeight(SDNode *N) {
1691 if (!isOpcodeHandled(N))
1692 return 1;
1693 assert(RootWeights.count(N) && "Cannot get weight of unseen root!");
1694 assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!");
1695 assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!");
1696 return RootWeights[N];
1697}
1698
1699int HexagonDAGToDAGISel::getHeight(SDNode *N) {
1700 if (!isOpcodeHandled(N))
1701 return 0;
1702 assert(RootWeights.count(N) && RootWeights[N] >= 0 &&
1703 "Cannot query height of unvisited/RAUW'd node!");
1704 return RootHeights[N];
1705}
1706
1707namespace {
1708struct WeightedLeaf {
1709 SDValue Value;
1710 int Weight;
1711 int InsertionOrder;
1712
1713 WeightedLeaf() {}
1714
1715 WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
1716 Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
1717 assert(Weight >= 0 && "Weight must be >= 0");
1718 }
1719
1720 static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) {
1721 assert(A.Value.getNode() && B.Value.getNode());
1722 return A.Weight == B.Weight ?
1723 (A.InsertionOrder > B.InsertionOrder) :
1724 (A.Weight > B.Weight);
1725 }
1726};
1727
1728/// A specialized priority queue for WeigthedLeaves. It automatically folds
1729/// constants and allows removal of non-top elements while maintaining the
1730/// priority order.
1731class LeafPrioQueue {
1733 bool HaveConst;
1734 WeightedLeaf ConstElt;
1735 unsigned Opcode;
1736
1737public:
1738 bool empty() {
1739 return (!HaveConst && Q.empty());
1740 }
1741
1742 size_t size() {
1743 return Q.size() + HaveConst;
1744 }
1745
1746 bool hasConst() {
1747 return HaveConst;
1748 }
1749
1750 const WeightedLeaf &top() {
1751 if (HaveConst)
1752 return ConstElt;
1753 return Q.front();
1754 }
1755
1756 WeightedLeaf pop() {
1757 if (HaveConst) {
1758 HaveConst = false;
1759 return ConstElt;
1760 }
1761 std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1762 return Q.pop_back_val();
1763 }
1764
1765 void push(WeightedLeaf L, bool SeparateConst=true) {
1766 if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) {
1767 if (Opcode == ISD::MUL &&
1768 cast<ConstantSDNode>(L.Value)->getSExtValue() == 1)
1769 return;
1770 if (Opcode == ISD::ADD &&
1771 cast<ConstantSDNode>(L.Value)->getSExtValue() == 0)
1772 return;
1773
1774 HaveConst = true;
1775 ConstElt = L;
1776 } else {
1777 Q.push_back(L);
1778 std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1779 }
1780 }
1781
1782 /// Push L to the bottom of the queue regardless of its weight. If L is
1783 /// constant, it will not be folded with other constants in the queue.
1784 void pushToBottom(WeightedLeaf L) {
1785 L.Weight = 1000;
1786 push(L, false);
1787 }
1788
1789 /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of
1790 /// lowest weight and remove it from the queue.
1791 WeightedLeaf findSHL(uint64_t MaxAmount);
1792
1793 WeightedLeaf findMULbyConst();
1794
1795 LeafPrioQueue(unsigned Opcode) :
1796 HaveConst(false), Opcode(Opcode) { }
1797};
1798} // end anonymous namespace
1799
1800WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) {
1801 int ResultPos;
1802 WeightedLeaf Result;
1803
1804 for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1805 const WeightedLeaf &L = Q[Pos];
1806 const SDValue &Val = L.Value;
1807 if (Val.getOpcode() != ISD::SHL ||
1808 !isa<ConstantSDNode>(Val.getOperand(1)) ||
1809 Val.getConstantOperandVal(1) > MaxAmount)
1810 continue;
1811 if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1812 (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1813 {
1814 Result = L;
1815 ResultPos = Pos;
1816 }
1817 }
1818
1819 if (Result.Value.getNode()) {
1820 Q.erase(&Q[ResultPos]);
1821 std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1822 }
1823
1824 return Result;
1825}
1826
1827WeightedLeaf LeafPrioQueue::findMULbyConst() {
1828 int ResultPos;
1829 WeightedLeaf Result;
1830
1831 for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1832 const WeightedLeaf &L = Q[Pos];
1833 const SDValue &Val = L.Value;
1834 if (Val.getOpcode() != ISD::MUL ||
1835 !isa<ConstantSDNode>(Val.getOperand(1)) ||
1836 Val.getConstantOperandVal(1) > 127)
1837 continue;
1838 if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1839 (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1840 {
1841 Result = L;
1842 ResultPos = Pos;
1843 }
1844 }
1845
1846 if (Result.Value.getNode()) {
1847 Q.erase(&Q[ResultPos]);
1848 std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1849 }
1850
1851 return Result;
1852}
1853
1854SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) {
1855 uint64_t MulFactor = 1ull << N->getConstantOperandVal(1);
1856 return CurDAG->getConstant(MulFactor, SDLoc(N),
1857 N->getOperand(1).getValueType());
1858}
1859
1860/// @returns the value x for which 2^x is a factor of Val
1861static unsigned getPowerOf2Factor(SDValue Val) {
1862 if (Val.getOpcode() == ISD::MUL) {
1863 unsigned MaxFactor = 0;
1864 for (int i = 0; i < 2; ++i) {
1865 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(i));
1866 if (!C)
1867 continue;
1868 const APInt &CInt = C->getAPIntValue();
1869 if (CInt.getBoolValue())
1870 MaxFactor = CInt.countr_zero();
1871 }
1872 return MaxFactor;
1873 }
1874 if (Val.getOpcode() == ISD::SHL) {
1875 if (!isa<ConstantSDNode>(Val.getOperand(1).getNode()))
1876 return 0;
1877 return (unsigned) Val.getConstantOperandVal(1);
1878 }
1879
1880 return 0;
1881}
1882
1883/// @returns true if V>>Amount will eliminate V's operation on its child
1884static bool willShiftRightEliminate(SDValue V, unsigned Amount) {
1885 if (V.getOpcode() == ISD::MUL) {
1886 SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1887 for (int i = 0; i < 2; ++i)
1888 if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1889 V.getConstantOperandVal(i) % (1ULL << Amount) == 0) {
1890 uint64_t NewConst = V.getConstantOperandVal(i) >> Amount;
1891 return (NewConst == 1);
1892 }
1893 } else if (V.getOpcode() == ISD::SHL) {
1894 return (Amount == V.getConstantOperandVal(1));
1895 }
1896
1897 return false;
1898}
1899
1900SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) {
1901 SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1902 if (V.getOpcode() == ISD::MUL) {
1903 for (int i=0; i < 2; ++i) {
1904 if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1905 V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) {
1906 uint64_t NewConst = V.getConstantOperandVal(i) >> Power;
1907 if (NewConst == 1)
1908 return Ops[!i];
1909 Ops[i] = CurDAG->getConstant(NewConst,
1910 SDLoc(V), V.getValueType());
1911 break;
1912 }
1913 }
1914 } else if (V.getOpcode() == ISD::SHL) {
1915 uint64_t ShiftAmount = V.getConstantOperandVal(1);
1916 if (ShiftAmount == Power)
1917 return Ops[0];
1918 Ops[1] = CurDAG->getConstant(ShiftAmount - Power,
1919 SDLoc(V), V.getValueType());
1920 }
1921
1922 return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops);
1923}
1924
1925static bool isTargetConstant(const SDValue &V) {
1926 return V.getOpcode() == HexagonISD::CONST32 ||
1927 V.getOpcode() == HexagonISD::CONST32_GP;
1928}
1929
1930unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) {
1931 if (GAUsesInFunction.count(V))
1932 return GAUsesInFunction[V];
1933
1934 unsigned Result = 0;
1935 const Function &CurF = CurDAG->getMachineFunction().getFunction();
1936 for (const User *U : V->users()) {
1937 if (isa<Instruction>(U) &&
1938 cast<Instruction>(U)->getParent()->getParent() == &CurF)
1939 ++Result;
1940 }
1941
1942 GAUsesInFunction[V] = Result;
1943
1944 return Result;
1945}
1946
1947/// Note - After calling this, N may be dead. It may have been replaced by a
1948/// new node, so always use the returned value in place of N.
1949///
1950/// @returns The SDValue taking the place of N (which could be N if it is
1951/// unchanged)
1952SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
1953 assert(RootWeights.count(N) && "Cannot balance non-root node.");
1954 assert(RootWeights[N] != -2 && "This node was RAUW'd!");
1955 assert(!TopLevel || N->getOpcode() == ISD::ADD);
1956
1957 // Return early if this node was already visited
1958 if (RootWeights[N] != -1)
1959 return SDValue(N, 0);
1960
1962
1963 SDValue Op0 = N->getOperand(0);
1964 SDValue Op1 = N->getOperand(1);
1965
1966 // Return early if the operands will remain unchanged or are all roots
1967 if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) &&
1968 (!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) {
1969 SDNode *Op0N = Op0.getNode();
1970 int Weight;
1971 if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) {
1972 Weight = getWeight(balanceSubTree(Op0N).getNode());
1973 // Weight = calculateWeight(Op0N);
1974 } else
1975 Weight = getWeight(Op0N);
1976
1977 SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd
1978 if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) {
1979 Weight += getWeight(balanceSubTree(Op1N).getNode());
1980 // Weight += calculateWeight(Op1N);
1981 } else
1982 Weight += getWeight(Op1N);
1983
1984 RootWeights[N] = Weight;
1985 RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
1986 getHeight(N->getOperand(1).getNode())) + 1;
1987
1988 LLVM_DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
1989 << " Height=" << RootHeights[N] << "): ");
1990 LLVM_DEBUG(N->dump(CurDAG));
1991
1992 return SDValue(N, 0);
1993 }
1994
1995 LLVM_DEBUG(dbgs() << "** Balancing root node: ");
1996 LLVM_DEBUG(N->dump(CurDAG));
1997
1998 unsigned NOpcode = N->getOpcode();
1999
2000 LeafPrioQueue Leaves(NOpcode);
2001 SmallVector<SDValue, 4> Worklist;
2002 Worklist.push_back(SDValue(N, 0));
2003
2004 // SHL nodes will be converted to MUL nodes
2005 if (NOpcode == ISD::SHL)
2006 NOpcode = ISD::MUL;
2007
2008 bool CanFactorize = false;
2009 WeightedLeaf Mul1, Mul2;
2010 unsigned MaxPowerOf2 = 0;
2011 WeightedLeaf GA;
2012
2013 // Do not try to factor out a shift if there is already a shift at the tip of
2014 // the tree.
2015 bool HaveTopLevelShift = false;
2016 if (TopLevel &&
2017 ((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL &&
2018 Op0.getConstantOperandVal(1) < 4) ||
2019 (isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL &&
2020 Op1.getConstantOperandVal(1) < 4)))
2021 HaveTopLevelShift = true;
2022
2023 // Flatten the subtree into an ordered list of leaves; at the same time
2024 // determine whether the tree is already balanced.
2025 int InsertionOrder = 0;
2026 SmallDenseMap<SDValue, int> NodeHeights;
2027 bool Imbalanced = false;
2028 int CurrentWeight = 0;
2029 while (!Worklist.empty()) {
2030 SDValue Child = Worklist.pop_back_val();
2031
2032 if (Child.getNode() != N && RootWeights.count(Child.getNode())) {
2033 // CASE 1: Child is a root note
2034
2035 int Weight = RootWeights[Child.getNode()];
2036 if (Weight == -1) {
2037 Child = balanceSubTree(Child.getNode());
2038 // calculateWeight(Child.getNode());
2039 Weight = getWeight(Child.getNode());
2040 } else if (Weight == -2) {
2041 // Whoops, this node was RAUWd by one of the balanceSubTree calls we
2042 // made. Our worklist isn't up to date anymore.
2043 // Restart the whole process.
2044 LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
2045 return balanceSubTree(N, TopLevel);
2046 }
2047
2048 NodeHeights[Child] = 1;
2049 CurrentWeight += Weight;
2050
2051 unsigned PowerOf2;
2052 if (TopLevel && !CanFactorize && !HaveTopLevelShift &&
2053 (Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) &&
2054 Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) {
2055 // Try to identify two factorizable MUL/SHL children greedily. Leave
2056 // them out of the priority queue for now so we can deal with them
2057 // after.
2058 if (!Mul1.Value.getNode()) {
2059 Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++);
2060 MaxPowerOf2 = PowerOf2;
2061 } else {
2062 Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++);
2063 MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2);
2064
2065 // Our addressing modes can only shift by a maximum of 3
2066 if (MaxPowerOf2 > 3)
2067 MaxPowerOf2 = 3;
2068
2069 CanFactorize = true;
2070 }
2071 } else
2072 Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
2073 } else if (!isOpcodeHandled(Child.getNode())) {
2074 // CASE 2: Child is an unhandled kind of node (e.g. constant)
2075 int Weight = getWeight(Child.getNode());
2076
2077 NodeHeights[Child] = getHeight(Child.getNode());
2078 CurrentWeight += Weight;
2079
2080 if (isTargetConstant(Child) && !GA.Value.getNode())
2081 GA = WeightedLeaf(Child, Weight, InsertionOrder++);
2082 else
2083 Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
2084 } else {
2085 // CASE 3: Child is a subtree of same opcode
2086 // Visit children first, then flatten.
2087 unsigned ChildOpcode = Child.getOpcode();
2088 assert(ChildOpcode == NOpcode ||
2089 (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL));
2090
2091 // Convert SHL to MUL
2092 SDValue Op1;
2093 if (ChildOpcode == ISD::SHL)
2094 Op1 = getMultiplierForSHL(Child.getNode());
2095 else
2096 Op1 = Child->getOperand(1);
2097
2098 if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) {
2099 assert(!NodeHeights.count(Child) && "Parent visited before children?");
2100 // Visit children first, then re-visit this node
2101 Worklist.push_back(Child);
2102 Worklist.push_back(Op1);
2103 Worklist.push_back(Child->getOperand(0));
2104 } else {
2105 // Back at this node after visiting the children
2106 if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1)
2107 Imbalanced = true;
2108
2109 NodeHeights[Child] = std::max(NodeHeights[Op1],
2110 NodeHeights[Child->getOperand(0)]) + 1;
2111 }
2112 }
2113 }
2114
2115 LLVM_DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
2116 << " weight=" << CurrentWeight
2117 << " imbalanced=" << Imbalanced << "\n");
2118
2119 // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
2120 // This factors out a shift in order to match memw(a<<Y+b).
2121 if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
2122 willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) {
2123 LLVM_DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
2124 int Weight = Mul1.Weight + Mul2.Weight;
2125 int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
2126 SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
2127 SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2);
2128 SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(),
2129 Mul1Factored, Mul2Factored);
2130 SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N),
2131 Mul1.Value.getValueType());
2132 SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(),
2133 Sum, Const);
2134 NodeHeights[New] = Height;
2135 Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder));
2136 } else if (Mul1.Value.getNode()) {
2137 // We failed to factorize two MULs, so now the Muls are left outside the
2138 // queue... add them back.
2139 Leaves.push(Mul1);
2140 if (Mul2.Value.getNode())
2141 Leaves.push(Mul2);
2142 CanFactorize = false;
2143 }
2144
2145 // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere
2146 // and the root node itself is not used more than twice. This reduces the
2147 // amount of additional constant extenders introduced by this optimization.
2148 bool CombinedGA = false;
2149 if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() &&
2150 GA.Value.hasOneUse() && N->use_size() < 3) {
2151 GlobalAddressSDNode *GANode =
2152 cast<GlobalAddressSDNode>(GA.Value.getOperand(0));
2153 ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value);
2154
2155 if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() &&
2156 getTargetLowering()->isOffsetFoldingLegal(GANode)) {
2157 LLVM_DEBUG(dbgs() << "--> Combining GA and offset ("
2158 << Offset->getSExtValue() << "): ");
2159 LLVM_DEBUG(GANode->dump(CurDAG));
2160
2161 SDValue NewTGA =
2162 CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
2163 GANode->getValueType(0),
2164 GANode->getOffset() + (uint64_t)Offset->getSExtValue());
2165 GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value),
2166 GA.Value.getValueType(), NewTGA);
2167 GA.Weight += Leaves.top().Weight;
2168
2169 NodeHeights[GA.Value] = getHeight(GA.Value.getNode());
2170 CombinedGA = true;
2171
2172 Leaves.pop(); // Remove the offset constant from the queue
2173 }
2174 }
2175
2176 if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) ||
2177 (RebalanceOnlyImbalancedTrees && !Imbalanced)) {
2178 RootWeights[N] = CurrentWeight;
2179 RootHeights[N] = NodeHeights[SDValue(N, 0)];
2180
2181 return SDValue(N, 0);
2182 }
2183
2184 // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5))
2185 if (NOpcode == ISD::ADD && GA.Value.getNode()) {
2186 WeightedLeaf SHL = Leaves.findSHL(31);
2187 if (SHL.Value.getNode()) {
2188 int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1;
2189 GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value),
2190 GA.Value.getValueType(),
2191 GA.Value, SHL.Value);
2192 GA.Weight = SHL.Weight; // Specifically ignore the GA weight here
2193 NodeHeights[GA.Value] = Height;
2194 }
2195 }
2196
2197 if (GA.Value.getNode())
2198 Leaves.push(GA);
2199
2200 // If this is the top level and we haven't factored out a shift, we should try
2201 // to move a constant to the bottom to match addressing modes like memw(rX+C)
2202 if (TopLevel && !CanFactorize && Leaves.hasConst()) {
2203 LLVM_DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
2204 Leaves.pushToBottom(Leaves.pop());
2205 }
2206
2207 const DataLayout &DL = CurDAG->getDataLayout();
2209
2210 // Rebuild the tree using Huffman's algorithm
2211 while (Leaves.size() > 1) {
2212 WeightedLeaf L0 = Leaves.pop();
2213
2214 // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)),
2215 // otherwise just get the next leaf
2216 WeightedLeaf L1 = Leaves.findMULbyConst();
2217 if (!L1.Value.getNode())
2218 L1 = Leaves.pop();
2219
2220 assert(L0.Weight <= L1.Weight && "Priority queue is broken!");
2221
2222 SDValue V0 = L0.Value;
2223 int V0Weight = L0.Weight;
2224 SDValue V1 = L1.Value;
2225 int V1Weight = L1.Weight;
2226
2227 // Make sure that none of these nodes have been RAUW'd
2228 if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) ||
2229 (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) {
2230 LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
2231 return balanceSubTree(N, TopLevel);
2232 }
2233
2234 ConstantSDNode *V0C = dyn_cast<ConstantSDNode>(V0);
2235 ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(V1);
2236 EVT VT = N->getValueType(0);
2237 SDValue NewNode;
2238
2239 if (V0C && !V1C) {
2240 std::swap(V0, V1);
2241 std::swap(V0C, V1C);
2242 }
2243
2244 // Calculate height of this node
2245 assert(NodeHeights.count(V0) && NodeHeights.count(V1) &&
2246 "Children must have been visited before re-combining them!");
2247 int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1;
2248
2249 // Rebuild this node (and restore SHL from MUL if needed)
2250 if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2())
2251 NewNode = CurDAG->getNode(
2252 ISD::SHL, SDLoc(V0), VT, V0,
2254 V1C->getAPIntValue().logBase2(), SDLoc(N),
2256 else
2257 NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1);
2258
2259 NodeHeights[NewNode] = Height;
2260
2261 int Weight = V0Weight + V1Weight;
2262 Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
2263
2264 LLVM_DEBUG(dbgs() << "--> Built new node (Weight=" << Weight
2265 << ",Height=" << Height << "):\n");
2266 LLVM_DEBUG(NewNode.dump());
2267 }
2268
2269 assert(Leaves.size() == 1);
2270 SDValue NewRoot = Leaves.top().Value;
2271
2272 assert(NodeHeights.count(NewRoot));
2273 int Height = NodeHeights[NewRoot];
2274
2275 // Restore SHL if we earlier converted it to a MUL
2276 if (NewRoot.getOpcode() == ISD::MUL) {
2277 ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1));
2278 if (V1C && V1C->getAPIntValue().isPowerOf2()) {
2279 EVT VT = NewRoot.getValueType();
2280 SDValue V0 = NewRoot.getOperand(0);
2281 NewRoot = CurDAG->getNode(
2282 ISD::SHL, SDLoc(NewRoot), VT, V0,
2284 V1C->getAPIntValue().logBase2(), SDLoc(NewRoot),
2286 }
2287 }
2288
2289 if (N != NewRoot.getNode()) {
2290 LLVM_DEBUG(dbgs() << "--> Root is now: ");
2291 LLVM_DEBUG(NewRoot.dump());
2292
2293 // Replace all uses of old root by new root
2294 CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
2295 // Mark that we have RAUW'd N
2296 RootWeights[N] = -2;
2297 } else {
2298 LLVM_DEBUG(dbgs() << "--> Root unchanged.\n");
2299 }
2300
2301 RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
2302 RootHeights[NewRoot.getNode()] = Height;
2303
2304 return NewRoot;
2305}
2306
2307void HexagonDAGToDAGISel::rebalanceAddressTrees() {
2309 SDNode *N = &Node;
2310 if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
2311 continue;
2312
2313 SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr();
2314 if (BasePtr.getOpcode() != ISD::ADD)
2315 continue;
2316
2317 // We've already processed this node
2318 if (RootWeights.count(BasePtr.getNode()))
2319 continue;
2320
2321 LLVM_DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
2322 LLVM_DEBUG(N->dump(CurDAG));
2323
2324 // FindRoots
2325 SmallVector<SDNode *, 4> Worklist;
2326
2327 Worklist.push_back(BasePtr.getOperand(0).getNode());
2328 Worklist.push_back(BasePtr.getOperand(1).getNode());
2329
2330 while (!Worklist.empty()) {
2331 SDNode *N = Worklist.pop_back_val();
2332 unsigned Opcode = N->getOpcode();
2333
2334 if (!isOpcodeHandled(N))
2335 continue;
2336
2337 Worklist.push_back(N->getOperand(0).getNode());
2338 Worklist.push_back(N->getOperand(1).getNode());
2339
2340 // Not a root if it has only one use and same opcode as its parent
2341 if (N->hasOneUse() && Opcode == N->use_begin()->getOpcode())
2342 continue;
2343
2344 // This root node has already been processed
2345 if (RootWeights.count(N))
2346 continue;
2347
2348 RootWeights[N] = -1;
2349 }
2350
2351 // Balance node itself
2352 RootWeights[BasePtr.getNode()] = -1;
2353 SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true);
2354
2355 if (N->getOpcode() == ISD::LOAD)
2356 N = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
2357 NewBasePtr, N->getOperand(2));
2358 else
2359 N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
2360 NewBasePtr, N->getOperand(3));
2361
2362 LLVM_DEBUG(dbgs() << "--> Final node: ");
2363 LLVM_DEBUG(N->dump(CurDAG));
2364 }
2365
2367 GAUsesInFunction.clear();
2368 RootHeights.clear();
2369 RootWeights.clear();
2370}
unsigned SubReg
aarch64 promote const
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static void push(SmallVectorImpl< uint64_t > &R, StringRef Str)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
uint64_t Addr
uint64_t Size
static bool willShiftRightEliminate(SDValue V, unsigned Amount)
static cl::opt< bool > RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden, cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"))
static unsigned getPowerOf2Factor(SDValue Val)
static cl::opt< bool > CheckSingleUse("hexagon-isel-su", cl::Hidden, cl::init(true), cl::desc("Enable checking of SDNode's single-use status"))
static cl::opt< bool > EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true), cl::desc("Rebalance address calculation trees to improve " "instruction selection"))
static bool isMemOPCandidate(SDNode *I, SDNode *U)
#define PASS_NAME
#define DEBUG_TYPE
static bool isTargetConstant(const SDValue &V)
static bool isOpcodeHandled(const SDNode *N)
static cl::opt< bool > RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false), cl::desc("Rebalance address tree only if this allows optimizations"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define T1
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
#define CH(x, y, z)
Definition: SHA256.cpp:34
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Class for arbitrary precision integers.
Definition: APInt.h:75
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1592
unsigned logBase2() const
Definition: APInt.h:1707
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:459
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:432
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
const GlobalValue * getGlobal() const
This class is used to form a handle around another node that is persistent and is updated across invo...
const SDValue & getValue() const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool SelectNewCircIntrinsic(SDNode *IntN)
Generate a machine instruction node for the new circular buffer intrinsics.
bool tryLoadOfLoadIntrinsic(LoadSDNode *N)
void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl)
MachineSDNode * LoadInstrForLoadIntrinsic(SDNode *IntN)
bool SelectAnyImm2(SDValue &N, SDValue &R)
bool SelectAnyImm(SDValue &N, SDValue &R)
bool SelectAnyImm0(SDValue &N, SDValue &R)
bool SelectAnyImm1(SDValue &N, SDValue &R)
bool DetectUseSxtw(SDValue &N, SDValue &R)
bool SelectBrevLdIntrinsic(SDNode *IntN)
bool SelectAddrFI(SDValue &N, SDValue &R)
SDNode * StoreInstrForLoadIntrinsic(MachineSDNode *LoadN, SDNode *IntN)
bool SelectAddrGP(SDValue &N, SDValue &R)
bool SelectAnyImmediate(SDValue &N, SDValue &R, Align Alignment)
bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP, Align Alignment)
bool SelectAnyImm3(SDValue &N, SDValue &R)
bool SelectAnyInt(SDValue &N, SDValue &R)
bool SelectAddrGA(SDValue &N, SDValue &R)
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl)
bool isValidAutoIncImm(const EVT VT, const int Offset) const
Hexagon target-specific information for each MachineFunction.
BitVector getReservedRegs(const MachineFunction &MF) const override
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
const HexagonFrameLowering * getFrameLowering() const override
const HexagonRegisterInfo * getRegisterInfo() const override
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
unsigned getVectorLength() const
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
uint64_t estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isValid() const
Definition: Register.h:126
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
const TargetLowering * TLI
MachineFunction * MF
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:721
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:726
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:543
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
ilist< SDNode >::size_type allnodes_size() const
Definition: SelectionDAG.h:539
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:675
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
This class is used to represent ISD::STORE nodes.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const
Return the type to use for a scalar shift opcode, given the shifted amount type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:119
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Level
Code generation optimization level.
Definition: CodeGen.h:57
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:978
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:779
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:773
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:726
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:704
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:599
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:572
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:776
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:794
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:679
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ ExternalSymbol
Definition: ISDOpcodes.h:83
@ BlockAddress
Definition: ISDOpcodes.h:84
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1396
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1427
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1777
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:271
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:179
unsigned M1(unsigned Val)
Definition: VE.h:468
FunctionPass * createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOpt::Level OptLevel)
createHexagonISelDag - This pass converts a legalized DAG into a Hexagon-specific DAG,...
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:245
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
unsigned M0(unsigned Val)
Definition: VE.h:467
@ Default
The result values are uniform if and only if all operands are uniform.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:351
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:299
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:160
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:311
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:144
This class contains a discriminated union of information about pointers in memory operands,...