LLVM 18.0.0git
HexagonISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the Hexagon target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "HexagonISelDAGToDAG.h"
14#include "Hexagon.h"
15#include "HexagonISelLowering.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsHexagon.h"
24#include "llvm/Support/Debug.h"
25using namespace llvm;
26
27#define DEBUG_TYPE "hexagon-isel"
28#define PASS_NAME "Hexagon DAG->DAG Pattern Instruction Selection"
29
30static
32EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true),
33 cl::desc("Rebalance address calculation trees to improve "
34 "instruction selection"));
35
36// Rebalance only if this allows e.g. combining a GA with an offset or
37// factoring out a shift.
38static
41 cl::desc("Rebalance address tree only if this allows optimizations"));
42
43static
46 cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));
47
48static cl::opt<bool> CheckSingleUse("hexagon-isel-su", cl::Hidden,
49 cl::init(true), cl::desc("Enable checking of SDNode's single-use status"));
50
51//===----------------------------------------------------------------------===//
52// Instruction Selector Implementation
53//===----------------------------------------------------------------------===//
54
55#define GET_DAGISEL_BODY HexagonDAGToDAGISel
56#include "HexagonGenDAGISel.inc"
57
58namespace llvm {
59/// createHexagonISelDag - This pass converts a legalized DAG into a
60/// Hexagon-specific DAG, ready for instruction scheduling.
62 CodeGenOptLevel OptLevel) {
63 return new HexagonDAGToDAGISel(TM, OptLevel);
64}
65}
66
68
70
71void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
72 SDValue Chain = LD->getChain();
73 SDValue Base = LD->getBasePtr();
74 SDValue Offset = LD->getOffset();
75 int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
76 EVT LoadedVT = LD->getMemoryVT();
77 unsigned Opcode = 0;
78
79 // Check for zero extended loads. Treat any-extend loads as zero extended
80 // loads.
81 ISD::LoadExtType ExtType = LD->getExtensionType();
82 bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
83 bool IsValidInc = HII->isValidAutoIncImm(LoadedVT, Inc);
84
85 assert(LoadedVT.isSimple());
86 switch (LoadedVT.getSimpleVT().SimpleTy) {
87 case MVT::i8:
88 if (IsZeroExt)
89 Opcode = IsValidInc ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrub_io;
90 else
91 Opcode = IsValidInc ? Hexagon::L2_loadrb_pi : Hexagon::L2_loadrb_io;
92 break;
93 case MVT::i16:
94 if (IsZeroExt)
95 Opcode = IsValidInc ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadruh_io;
96 else
97 Opcode = IsValidInc ? Hexagon::L2_loadrh_pi : Hexagon::L2_loadrh_io;
98 break;
99 case MVT::i32:
100 case MVT::f32:
101 case MVT::v2i16:
102 case MVT::v4i8:
103 Opcode = IsValidInc ? Hexagon::L2_loadri_pi : Hexagon::L2_loadri_io;
104 break;
105 case MVT::i64:
106 case MVT::f64:
107 case MVT::v2i32:
108 case MVT::v4i16:
109 case MVT::v8i8:
110 Opcode = IsValidInc ? Hexagon::L2_loadrd_pi : Hexagon::L2_loadrd_io;
111 break;
112 case MVT::v64i8:
113 case MVT::v32i16:
114 case MVT::v16i32:
115 case MVT::v8i64:
116 case MVT::v128i8:
117 case MVT::v64i16:
118 case MVT::v32i32:
119 case MVT::v16i64:
120 if (isAlignedMemNode(LD)) {
121 if (LD->isNonTemporal())
122 Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai;
123 else
124 Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai;
125 } else {
126 Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai;
127 }
128 break;
129 default:
130 llvm_unreachable("Unexpected memory type in indexed load");
131 }
132
133 SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
134 MachineMemOperand *MemOp = LD->getMemOperand();
135
136 auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl)
137 -> MachineSDNode* {
138 if (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD) {
139 SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
140 return CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64,
141 Zero, SDValue(N, 0));
142 }
143 if (ExtType == ISD::SEXTLOAD)
144 return CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
145 SDValue(N, 0));
146 return N;
147 };
148
149 // Loaded value Next address Chain
150 SDValue From[3] = { SDValue(LD,0), SDValue(LD,1), SDValue(LD,2) };
151 SDValue To[3];
152
153 EVT ValueVT = LD->getValueType(0);
154 if (ValueVT == MVT::i64 && ExtType != ISD::NON_EXTLOAD) {
155 // A load extending to i64 will actually produce i32, which will then
156 // need to be extended to i64.
157 assert(LoadedVT.getSizeInBits() <= 32);
158 ValueVT = MVT::i32;
159 }
160
161 if (IsValidInc) {
162 MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT,
163 MVT::i32, MVT::Other, Base,
164 IncV, Chain);
165 CurDAG->setNodeMemRefs(L, {MemOp});
166 To[1] = SDValue(L, 1); // Next address.
167 To[2] = SDValue(L, 2); // Chain.
168 // Handle special case for extension to i64.
169 if (LD->getValueType(0) == MVT::i64)
170 L = getExt64(L, dl);
171 To[0] = SDValue(L, 0); // Loaded (extended) value.
172 } else {
173 SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
174 MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other,
175 Base, Zero, Chain);
176 CurDAG->setNodeMemRefs(L, {MemOp});
177 To[2] = SDValue(L, 1); // Chain.
178 MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
179 Base, IncV);
180 To[1] = SDValue(A, 0); // Next address.
181 // Handle special case for extension to i64.
182 if (LD->getValueType(0) == MVT::i64)
183 L = getExt64(L, dl);
184 To[0] = SDValue(L, 0); // Loaded (extended) value.
185 }
186 ReplaceUses(From, To, 3);
187 CurDAG->RemoveDeadNode(LD);
188}
189
191 if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
192 return nullptr;
193
194 SDLoc dl(IntN);
195 unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
196
197 static std::map<unsigned,unsigned> LoadPciMap = {
198 { Intrinsic::hexagon_circ_ldb, Hexagon::L2_loadrb_pci },
199 { Intrinsic::hexagon_circ_ldub, Hexagon::L2_loadrub_pci },
200 { Intrinsic::hexagon_circ_ldh, Hexagon::L2_loadrh_pci },
201 { Intrinsic::hexagon_circ_lduh, Hexagon::L2_loadruh_pci },
202 { Intrinsic::hexagon_circ_ldw, Hexagon::L2_loadri_pci },
203 { Intrinsic::hexagon_circ_ldd, Hexagon::L2_loadrd_pci },
204 };
205 auto FLC = LoadPciMap.find(IntNo);
206 if (FLC != LoadPciMap.end()) {
207 EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i64 : MVT::i32;
208 EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
209 // Operands: { Base, Increment, Modifier, Chain }
210 auto Inc = cast<ConstantSDNode>(IntN->getOperand(5));
211 SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), dl, MVT::i32);
212 MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys,
213 { IntN->getOperand(2), I, IntN->getOperand(4),
214 IntN->getOperand(0) });
215 return Res;
216 }
217
218 return nullptr;
219}
220
222 SDNode *IntN) {
223 // The "LoadN" is just a machine load instruction. The intrinsic also
224 // involves storing it. Generate an appropriate store to the location
225 // given in the intrinsic's operand(3).
226 uint64_t F = HII->get(LoadN->getMachineOpcode()).TSFlags;
227 unsigned SizeBits = (F >> HexagonII::MemAccessSizePos) &
229 unsigned Size = 1U << (SizeBits-1);
230
231 SDLoc dl(IntN);
233 SDValue TS;
234 SDValue Loc = IntN->getOperand(3);
235
236 if (Size >= 4)
237 TS = CurDAG->getStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, PI,
238 Align(Size));
239 else
240 TS = CurDAG->getTruncStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc,
241 PI, MVT::getIntegerVT(Size * 8), Align(Size));
242
243 SDNode *StoreN;
244 {
245 HandleSDNode Handle(TS);
246 SelectStore(TS.getNode());
247 StoreN = Handle.getValue().getNode();
248 }
249
250 // Load's results are { Loaded value, Updated pointer, Chain }
251 ReplaceUses(SDValue(IntN, 0), SDValue(LoadN, 1));
252 ReplaceUses(SDValue(IntN, 1), SDValue(StoreN, 0));
253 return StoreN;
254}
255
257 // The intrinsics for load circ/brev perform two operations:
258 // 1. Load a value V from the specified location, using the addressing
259 // mode corresponding to the intrinsic.
260 // 2. Store V into a specified location. This location is typically a
261 // local, temporary object.
262 // In many cases, the program using these intrinsics will immediately
263 // load V again from the local object. In those cases, when certain
264 // conditions are met, the last load can be removed.
265 // This function identifies and optimizes this pattern. If the pattern
266 // cannot be optimized, it returns nullptr, which will cause the load
267 // to be selected separately from the intrinsic (which will be handled
268 // in SelectIntrinsicWChain).
269
270 SDValue Ch = N->getOperand(0);
271 SDValue Loc = N->getOperand(1);
272
273 // Assume that the load and the intrinsic are connected directly with a
274 // chain:
275 // t1: i32,ch = int.load ..., ..., ..., Loc, ... // <-- C
276 // t2: i32,ch = load t1:1, Loc, ...
277 SDNode *C = Ch.getNode();
278
279 if (C->getOpcode() != ISD::INTRINSIC_W_CHAIN)
280 return false;
281
282 // The second load can only be eliminated if its extension type matches
283 // that of the load instruction corresponding to the intrinsic. The user
284 // can provide an address of an unsigned variable to store the result of
285 // a sign-extending intrinsic into (or the other way around).
286 ISD::LoadExtType IntExt;
287 switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) {
288 case Intrinsic::hexagon_circ_ldub:
289 case Intrinsic::hexagon_circ_lduh:
290 IntExt = ISD::ZEXTLOAD;
291 break;
292 case Intrinsic::hexagon_circ_ldw:
293 case Intrinsic::hexagon_circ_ldd:
294 IntExt = ISD::NON_EXTLOAD;
295 break;
296 default:
297 IntExt = ISD::SEXTLOAD;
298 break;
299 }
300 if (N->getExtensionType() != IntExt)
301 return false;
302
303 // Make sure the target location for the loaded value in the load intrinsic
304 // is the location from which LD (or N) is loading.
305 if (C->getNumOperands() < 4 || Loc.getNode() != C->getOperand(3).getNode())
306 return false;
307
310 SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) };
311 SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) };
312 ReplaceUses(F, T, std::size(T));
313 // This transformation will leave the intrinsic dead. If it remains in
314 // the DAG, the selection code will see it again, but without the load,
315 // and it will generate a store that is normally required for it.
317 return true;
318 }
319 return false;
320}
321
322// Convert the bit-reverse load intrinsic to appropriate target instruction.
324 if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
325 return false;
326
327 const SDLoc &dl(IntN);
328 unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
329
330 static const std::map<unsigned, unsigned> LoadBrevMap = {
331 { Intrinsic::hexagon_L2_loadrb_pbr, Hexagon::L2_loadrb_pbr },
332 { Intrinsic::hexagon_L2_loadrub_pbr, Hexagon::L2_loadrub_pbr },
333 { Intrinsic::hexagon_L2_loadrh_pbr, Hexagon::L2_loadrh_pbr },
334 { Intrinsic::hexagon_L2_loadruh_pbr, Hexagon::L2_loadruh_pbr },
335 { Intrinsic::hexagon_L2_loadri_pbr, Hexagon::L2_loadri_pbr },
336 { Intrinsic::hexagon_L2_loadrd_pbr, Hexagon::L2_loadrd_pbr }
337 };
338 auto FLI = LoadBrevMap.find(IntNo);
339 if (FLI != LoadBrevMap.end()) {
340 EVT ValTy =
341 (IntNo == Intrinsic::hexagon_L2_loadrd_pbr) ? MVT::i64 : MVT::i32;
342 EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
343 // Operands of Intrinsic: {chain, enum ID of intrinsic, baseptr,
344 // modifier}.
345 // Operands of target instruction: { Base, Modifier, Chain }.
347 FLI->second, dl, RTys,
348 {IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(0)});
349
350 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
351 CurDAG->setNodeMemRefs(Res, {MemOp});
352
353 ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
354 ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
355 ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
356 CurDAG->RemoveDeadNode(IntN);
357 return true;
358 }
359 return false;
360}
361
362/// Generate a machine instruction node for the new circular buffer intrinsics.
363/// The new versions use a CSx register instead of the K field.
365 if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
366 return false;
367
368 SDLoc DL(IntN);
369 unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
371
372 static std::map<unsigned,unsigned> LoadNPcMap = {
373 { Intrinsic::hexagon_L2_loadrub_pci, Hexagon::PS_loadrub_pci },
374 { Intrinsic::hexagon_L2_loadrb_pci, Hexagon::PS_loadrb_pci },
375 { Intrinsic::hexagon_L2_loadruh_pci, Hexagon::PS_loadruh_pci },
376 { Intrinsic::hexagon_L2_loadrh_pci, Hexagon::PS_loadrh_pci },
377 { Intrinsic::hexagon_L2_loadri_pci, Hexagon::PS_loadri_pci },
378 { Intrinsic::hexagon_L2_loadrd_pci, Hexagon::PS_loadrd_pci },
379 { Intrinsic::hexagon_L2_loadrub_pcr, Hexagon::PS_loadrub_pcr },
380 { Intrinsic::hexagon_L2_loadrb_pcr, Hexagon::PS_loadrb_pcr },
381 { Intrinsic::hexagon_L2_loadruh_pcr, Hexagon::PS_loadruh_pcr },
382 { Intrinsic::hexagon_L2_loadrh_pcr, Hexagon::PS_loadrh_pcr },
383 { Intrinsic::hexagon_L2_loadri_pcr, Hexagon::PS_loadri_pcr },
384 { Intrinsic::hexagon_L2_loadrd_pcr, Hexagon::PS_loadrd_pcr }
385 };
386 auto FLI = LoadNPcMap.find (IntNo);
387 if (FLI != LoadNPcMap.end()) {
388 EVT ValTy = MVT::i32;
389 if (IntNo == Intrinsic::hexagon_L2_loadrd_pci ||
390 IntNo == Intrinsic::hexagon_L2_loadrd_pcr)
391 ValTy = MVT::i64;
392 EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
393 // Handle load.*_pci case which has 6 operands.
394 if (IntN->getNumOperands() == 6) {
395 auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
396 SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
397 // Operands: { Base, Increment, Modifier, Start, Chain }.
398 Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
399 IntN->getOperand(0) };
400 } else
401 // Handle load.*_pcr case which has 5 operands.
402 // Operands: { Base, Modifier, Start, Chain }.
403 Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
404 IntN->getOperand(0) };
405 MachineSDNode *Res = CurDAG->getMachineNode(FLI->second, DL, RTys, Ops);
406 ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
407 ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
408 ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
409 CurDAG->RemoveDeadNode(IntN);
410 return true;
411 }
412
413 static std::map<unsigned,unsigned> StoreNPcMap = {
414 { Intrinsic::hexagon_S2_storerb_pci, Hexagon::PS_storerb_pci },
415 { Intrinsic::hexagon_S2_storerh_pci, Hexagon::PS_storerh_pci },
416 { Intrinsic::hexagon_S2_storerf_pci, Hexagon::PS_storerf_pci },
417 { Intrinsic::hexagon_S2_storeri_pci, Hexagon::PS_storeri_pci },
418 { Intrinsic::hexagon_S2_storerd_pci, Hexagon::PS_storerd_pci },
419 { Intrinsic::hexagon_S2_storerb_pcr, Hexagon::PS_storerb_pcr },
420 { Intrinsic::hexagon_S2_storerh_pcr, Hexagon::PS_storerh_pcr },
421 { Intrinsic::hexagon_S2_storerf_pcr, Hexagon::PS_storerf_pcr },
422 { Intrinsic::hexagon_S2_storeri_pcr, Hexagon::PS_storeri_pcr },
423 { Intrinsic::hexagon_S2_storerd_pcr, Hexagon::PS_storerd_pcr }
424 };
425 auto FSI = StoreNPcMap.find (IntNo);
426 if (FSI != StoreNPcMap.end()) {
427 EVT RTys[] = { MVT::i32, MVT::Other };
428 // Handle store.*_pci case which has 7 operands.
429 if (IntN->getNumOperands() == 7) {
430 auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
431 SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
432 // Operands: { Base, Increment, Modifier, Value, Start, Chain }.
433 Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
434 IntN->getOperand(6), IntN->getOperand(0) };
435 } else
436 // Handle store.*_pcr case which has 6 operands.
437 // Operands: { Base, Modifier, Value, Start, Chain }.
438 Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
439 IntN->getOperand(5), IntN->getOperand(0) };
440 MachineSDNode *Res = CurDAG->getMachineNode(FSI->second, DL, RTys, Ops);
441 ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
442 ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
443 CurDAG->RemoveDeadNode(IntN);
444 return true;
445 }
446
447 return false;
448}
449
451 SDLoc dl(N);
452 LoadSDNode *LD = cast<LoadSDNode>(N);
453
454 // Handle indexed loads.
455 ISD::MemIndexedMode AM = LD->getAddressingMode();
456 if (AM != ISD::UNINDEXED) {
457 SelectIndexedLoad(LD, dl);
458 return;
459 }
460
461 // Handle patterns using circ/brev load intrinsics.
463 return;
464
465 SelectCode(LD);
466}
467
469 SDValue Chain = ST->getChain();
470 SDValue Base = ST->getBasePtr();
471 SDValue Offset = ST->getOffset();
472 SDValue Value = ST->getValue();
473 // Get the constant value.
474 int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
475 EVT StoredVT = ST->getMemoryVT();
476 EVT ValueVT = Value.getValueType();
477
478 bool IsValidInc = HII->isValidAutoIncImm(StoredVT, Inc);
479 unsigned Opcode = 0;
480
481 assert(StoredVT.isSimple());
482 switch (StoredVT.getSimpleVT().SimpleTy) {
483 case MVT::i8:
484 Opcode = IsValidInc ? Hexagon::S2_storerb_pi : Hexagon::S2_storerb_io;
485 break;
486 case MVT::i16:
487 Opcode = IsValidInc ? Hexagon::S2_storerh_pi : Hexagon::S2_storerh_io;
488 break;
489 case MVT::i32:
490 case MVT::f32:
491 case MVT::v2i16:
492 case MVT::v4i8:
493 Opcode = IsValidInc ? Hexagon::S2_storeri_pi : Hexagon::S2_storeri_io;
494 break;
495 case MVT::i64:
496 case MVT::f64:
497 case MVT::v2i32:
498 case MVT::v4i16:
499 case MVT::v8i8:
500 Opcode = IsValidInc ? Hexagon::S2_storerd_pi : Hexagon::S2_storerd_io;
501 break;
502 case MVT::v64i8:
503 case MVT::v32i16:
504 case MVT::v16i32:
505 case MVT::v8i64:
506 case MVT::v128i8:
507 case MVT::v64i16:
508 case MVT::v32i32:
509 case MVT::v16i64:
510 if (isAlignedMemNode(ST)) {
511 if (ST->isNonTemporal())
512 Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai;
513 else
514 Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai;
515 } else {
516 Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai;
517 }
518 break;
519 default:
520 llvm_unreachable("Unexpected memory type in indexed store");
521 }
522
523 if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
524 assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
525 Value = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo,
526 dl, MVT::i32, Value);
527 }
528
529 SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
530 MachineMemOperand *MemOp = ST->getMemOperand();
531
532 // Next address Chain
533 SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) };
534 SDValue To[2];
535
536 if (IsValidInc) {
537 // Build post increment store.
538 SDValue Ops[] = { Base, IncV, Value, Chain };
539 MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other,
540 Ops);
542 To[0] = SDValue(S, 0);
543 To[1] = SDValue(S, 1);
544 } else {
545 SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
546 SDValue Ops[] = { Base, Zero, Value, Chain };
547 MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
549 To[1] = SDValue(S, 0);
550 MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
551 Base, IncV);
552 To[0] = SDValue(A, 0);
553 }
554
555 ReplaceUses(From, To, 2);
557}
558
560 SDLoc dl(N);
561 StoreSDNode *ST = cast<StoreSDNode>(N);
562
563 // Handle indexed stores.
564 ISD::MemIndexedMode AM = ST->getAddressingMode();
565 if (AM != ISD::UNINDEXED) {
566 SelectIndexedStore(ST, dl);
567 return;
568 }
569
570 SelectCode(ST);
571}
572
574 SDLoc dl(N);
575 SDValue Shl_0 = N->getOperand(0);
576 SDValue Shl_1 = N->getOperand(1);
577
578 auto Default = [this,N] () -> void { SelectCode(N); };
579
580 if (N->getValueType(0) != MVT::i32 || Shl_1.getOpcode() != ISD::Constant)
581 return Default();
582
583 // RHS is const.
584 int32_t ShlConst = cast<ConstantSDNode>(Shl_1)->getSExtValue();
585
586 if (Shl_0.getOpcode() == ISD::MUL) {
587 SDValue Mul_0 = Shl_0.getOperand(0); // Val
588 SDValue Mul_1 = Shl_0.getOperand(1); // Const
589 // RHS of mul is const.
590 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mul_1)) {
591 int32_t ValConst = C->getSExtValue() << ShlConst;
592 if (isInt<9>(ValConst)) {
593 SDValue Val = CurDAG->getTargetConstant(ValConst, dl, MVT::i32);
594 SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
595 MVT::i32, Mul_0, Val);
596 ReplaceNode(N, Result);
597 return;
598 }
599 }
600 return Default();
601 }
602
603 if (Shl_0.getOpcode() == ISD::SUB) {
604 SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
605 SDValue Sub_1 = Shl_0.getOperand(1); // Val
606 if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Sub_0)) {
607 if (C1->getSExtValue() != 0 || Sub_1.getOpcode() != ISD::SHL)
608 return Default();
609 SDValue Shl2_0 = Sub_1.getOperand(0); // Val
610 SDValue Shl2_1 = Sub_1.getOperand(1); // Const
611 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(Shl2_1)) {
612 int32_t ValConst = 1 << (ShlConst + C2->getSExtValue());
613 if (isInt<9>(-ValConst)) {
614 SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, MVT::i32);
615 SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
616 MVT::i32, Shl2_0, Val);
617 ReplaceNode(N, Result);
618 return;
619 }
620 }
621 }
622 }
623
624 return Default();
625}
626
627//
628// Handling intrinsics for circular load and bitreverse load.
629//
634 return;
635 }
636
637 // Handle bit-reverse load intrinsics.
639 return;
640
642 return;
643
644 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
645 if (IntNo == Intrinsic::hexagon_V6_vgathermw ||
646 IntNo == Intrinsic::hexagon_V6_vgathermw_128B ||
647 IntNo == Intrinsic::hexagon_V6_vgathermh ||
648 IntNo == Intrinsic::hexagon_V6_vgathermh_128B ||
649 IntNo == Intrinsic::hexagon_V6_vgathermhw ||
650 IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) {
652 return;
653 }
654 if (IntNo == Intrinsic::hexagon_V6_vgathermwq ||
655 IntNo == Intrinsic::hexagon_V6_vgathermwq_128B ||
656 IntNo == Intrinsic::hexagon_V6_vgathermhq ||
657 IntNo == Intrinsic::hexagon_V6_vgathermhq_128B ||
658 IntNo == Intrinsic::hexagon_V6_vgathermhwq ||
659 IntNo == Intrinsic::hexagon_V6_vgathermhwq_128B) {
661 return;
662 }
663
664 SelectCode(N);
665}
666
668 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
669 unsigned Bits;
670 switch (IID) {
671 case Intrinsic::hexagon_S2_vsplatrb:
672 Bits = 8;
673 break;
674 case Intrinsic::hexagon_S2_vsplatrh:
675 Bits = 16;
676 break;
677 case Intrinsic::hexagon_V6_vaddcarry:
678 case Intrinsic::hexagon_V6_vaddcarry_128B:
679 case Intrinsic::hexagon_V6_vsubcarry:
680 case Intrinsic::hexagon_V6_vsubcarry_128B:
682 return;
683 default:
684 SelectCode(N);
685 return;
686 }
687
688 SDValue V = N->getOperand(1);
689 SDValue U;
690 // Splat intrinsics.
691 if (keepsLowBits(V, Bits, U)) {
692 SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
693 N->getOperand(0), U);
694 ReplaceNode(N, R.getNode());
695 SelectCode(R.getNode());
696 return;
697 }
698 SelectCode(N);
699}
700
702 SDValue Inp = N->getOperand(0);
703 MVT ResTy = N->getValueType(0).getSimpleVT();
704 auto IdxN = cast<ConstantSDNode>(N->getOperand(1));
705 unsigned Idx = IdxN->getZExtValue();
706
707 [[maybe_unused]] MVT InpTy = Inp.getValueType().getSimpleVT();
708 [[maybe_unused]] unsigned ResLen = ResTy.getVectorNumElements();
710 assert(2 * ResLen == InpTy.getVectorNumElements());
711 assert(ResTy.getSizeInBits() == 32);
712 assert(Idx == 0 || Idx == ResLen);
713
714 unsigned SubReg = Idx == 0 ? Hexagon::isub_lo : Hexagon::isub_hi;
715 SDValue Ext = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(N), ResTy, Inp);
716
717 ReplaceNode(N, Ext.getNode());
718}
719
720//
721// Map floating point constant values.
722//
724 SDLoc dl(N);
725 auto *CN = cast<ConstantFPSDNode>(N);
726 APInt A = CN->getValueAPF().bitcastToAPInt();
727 if (N->getValueType(0) == MVT::f32) {
728 SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32);
729 ReplaceNode(N, CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, V));
730 return;
731 }
732 if (N->getValueType(0) == MVT::f64) {
733 SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i64);
734 ReplaceNode(N, CurDAG->getMachineNode(Hexagon::CONST64, dl, MVT::f64, V));
735 return;
736 }
737
738 SelectCode(N);
739}
740
741//
742// Map boolean values.
743//
745 if (N->getValueType(0) == MVT::i1) {
746 assert(!(cast<ConstantSDNode>(N)->getZExtValue() >> 1));
747 unsigned Opc = (cast<ConstantSDNode>(N)->getSExtValue() != 0)
748 ? Hexagon::PS_true
749 : Hexagon::PS_false;
750 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i1));
751 return;
752 }
753
754 SelectCode(N);
755}
756
759 const HexagonFrameLowering *HFI = HST->getFrameLowering();
760 int FX = cast<FrameIndexSDNode>(N)->getIndex();
761 Align StkA = HFI->getStackAlign();
762 Align MaxA = MFI.getMaxAlign();
763 SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32);
764 SDLoc DL(N);
765 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
766 SDNode *R = nullptr;
767
768 // Use PS_fi when:
769 // - the object is fixed, or
770 // - there are no objects with higher-than-default alignment, or
771 // - there are no dynamically allocated objects.
772 // Otherwise, use PS_fia.
773 if (FX < 0 || MaxA <= StkA || !MFI.hasVarSizedObjects()) {
774 R = CurDAG->getMachineNode(Hexagon::PS_fi, DL, MVT::i32, FI, Zero);
775 } else {
776 auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
777 Register AR = HMFI.getStackAlignBaseReg();
779 SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
780 R = CurDAG->getMachineNode(Hexagon::PS_fia, DL, MVT::i32, Ops);
781 }
782
783 ReplaceNode(N, R);
784}
785
787 unsigned OpcCarry = N->getOpcode() == HexagonISD::ADDC ? Hexagon::A4_addp_c
788 : Hexagon::A4_subp_c;
789 SDNode *C = CurDAG->getMachineNode(OpcCarry, SDLoc(N), N->getVTList(),
790 { N->getOperand(0), N->getOperand(1),
791 N->getOperand(2) });
792 ReplaceNode(N, C);
793}
794
796 MVT ResTy = N->getValueType(0).getSimpleVT();
797 if (HST->isHVXVectorType(ResTy, true))
798 return SelectHvxVAlign(N);
799
800 const SDLoc &dl(N);
801 unsigned VecLen = ResTy.getSizeInBits();
802 if (VecLen == 32) {
803 SDValue Ops[] = {
804 CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
805 N->getOperand(0),
806 CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
807 N->getOperand(1),
808 CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
809 };
810 SDNode *R = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
811 MVT::i64, Ops);
812
813 // Shift right by "(Addr & 0x3) * 8" bytes.
814 SDNode *C;
815 SDValue M0 = CurDAG->getTargetConstant(0x18, dl, MVT::i32);
816 SDValue M1 = CurDAG->getTargetConstant(0x03, dl, MVT::i32);
817 if (HST->useCompound()) {
818 C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32,
819 M0, N->getOperand(2), M1);
820 } else {
821 SDNode *T = CurDAG->getMachineNode(Hexagon::S2_asl_i_r, dl, MVT::i32,
822 N->getOperand(2), M1);
823 C = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
824 SDValue(T, 0), M0);
825 }
826 SDNode *S = CurDAG->getMachineNode(Hexagon::S2_lsr_r_p, dl, MVT::i64,
827 SDValue(R, 0), SDValue(C, 0));
828 SDValue E = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, ResTy,
829 SDValue(S, 0));
830 ReplaceNode(N, E.getNode());
831 } else {
832 assert(VecLen == 64);
833 SDNode *Pu = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::v8i1,
834 N->getOperand(2));
835 SDNode *VA = CurDAG->getMachineNode(Hexagon::S2_valignrb, dl, ResTy,
836 N->getOperand(0), N->getOperand(1),
837 SDValue(Pu,0));
838 ReplaceNode(N, VA);
839 }
840}
841
843 const SDLoc &dl(N);
844 SDValue A = N->getOperand(1);
845 int Mask = -cast<ConstantSDNode>(A.getNode())->getSExtValue();
846 assert(isPowerOf2_32(-Mask));
847
848 SDValue M = CurDAG->getTargetConstant(Mask, dl, MVT::i32);
849 SDNode *AA = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
850 N->getOperand(0), M);
851 ReplaceNode(N, AA);
852}
853
854// Handle these nodes here to avoid having to write patterns for all
855// combinations of input/output types. In all cases, the resulting
856// instruction is the same.
858 SDValue Op = N->getOperand(0);
859 MVT OpTy = Op.getValueType().getSimpleVT();
860 SDNode *T = CurDAG->MorphNodeTo(N, N->getOpcode(),
861 CurDAG->getVTList(OpTy), {Op});
862 ReplaceNode(T, Op.getNode());
863}
864
866 MVT ResTy = N->getValueType(0).getSimpleVT();
867 SDNode *T = CurDAG->getMachineNode(Hexagon::C2_mask, SDLoc(N), ResTy,
868 N->getOperand(0));
869 ReplaceNode(N, T);
870}
871
873 const SDLoc &dl(N);
874 MVT ResTy = N->getValueType(0).getSimpleVT();
875 SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
876 SDNode *T = CurDAG->getMachineNode(Hexagon::A4_vcmpbgtui, dl, ResTy,
877 N->getOperand(0), Zero);
878 ReplaceNode(N, T);
879}
880
882 const SDLoc &dl(N);
883 MVT ResTy = N->getValueType(0).getSimpleVT();
884 // The argument to V2Q should be a single vector.
885 MVT OpTy = N->getOperand(0).getValueType().getSimpleVT(); (void)OpTy;
886 assert(HST->getVectorLength() * 8 == OpTy.getSizeInBits());
887
888 SDValue C = CurDAG->getTargetConstant(-1, dl, MVT::i32);
889 SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
890 SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandvrt, dl, ResTy,
891 N->getOperand(0), SDValue(R,0));
892 ReplaceNode(N, T);
893}
894
896 const SDLoc &dl(N);
897 MVT ResTy = N->getValueType(0).getSimpleVT();
898 // The result of V2Q should be a single vector.
899 assert(HST->getVectorLength() * 8 == ResTy.getSizeInBits());
900
901 SDValue C = CurDAG->getTargetConstant(-1, dl, MVT::i32);
902 SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
903 SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandqrt, dl, ResTy,
904 N->getOperand(0), SDValue(R,0));
905 ReplaceNode(N, T);
906}
907
909 if (N->isMachineOpcode())
910 return N->setNodeId(-1); // Already selected.
911
912 auto isHvxOp = [this](SDNode *N) {
913 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
914 if (HST->isHVXVectorType(N->getValueType(i), true))
915 return true;
916 }
917 for (SDValue I : N->ops()) {
918 if (HST->isHVXVectorType(I.getValueType(), true))
919 return true;
920 }
921 return false;
922 };
923
924 if (HST->useHVXOps() && isHvxOp(N)) {
925 switch (N->getOpcode()) {
926 case ISD::EXTRACT_SUBVECTOR: return SelectHvxExtractSubvector(N);
927 case ISD::VECTOR_SHUFFLE: return SelectHvxShuffle(N);
928
929 case HexagonISD::VROR: return SelectHvxRor(N);
930 }
931 }
932
933 switch (N->getOpcode()) {
934 case ISD::Constant: return SelectConstant(N);
935 case ISD::ConstantFP: return SelectConstantFP(N);
936 case ISD::FrameIndex: return SelectFrameIndex(N);
937 case ISD::SHL: return SelectSHL(N);
938 case ISD::LOAD: return SelectLoad(N);
939 case ISD::STORE: return SelectStore(N);
943
944 case HexagonISD::ADDC:
946 case HexagonISD::VALIGN: return SelectVAlign(N);
949 case HexagonISD::P2D: return SelectP2D(N);
950 case HexagonISD::D2P: return SelectD2P(N);
951 case HexagonISD::Q2V: return SelectQ2V(N);
952 case HexagonISD::V2Q: return SelectV2Q(N);
953 }
954
955 SelectCode(N);
956}
957
959 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
960 std::vector<SDValue> &OutOps) {
961 SDValue Inp = Op, Res;
962
963 switch (ConstraintID) {
964 default:
965 return true;
966 case InlineAsm::ConstraintCode::o: // Offsetable.
967 case InlineAsm::ConstraintCode::v: // Not offsetable.
968 case InlineAsm::ConstraintCode::m: // Memory.
969 if (SelectAddrFI(Inp, Res))
970 OutOps.push_back(Res);
971 else
972 OutOps.push_back(Inp);
973 break;
974 }
975
976 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
977 return false;
978}
979
980static bool isMemOPCandidate(SDNode *I, SDNode *U) {
981 // I is an operand of U. Check if U is an arithmetic (binary) operation
982 // usable in a memop, where the other operand is a loaded value, and the
983 // result of U is stored in the same location.
984
985 if (!U->hasOneUse())
986 return false;
987 unsigned Opc = U->getOpcode();
988 switch (Opc) {
989 case ISD::ADD:
990 case ISD::SUB:
991 case ISD::AND:
992 case ISD::OR:
993 break;
994 default:
995 return false;
996 }
997
998 SDValue S0 = U->getOperand(0);
999 SDValue S1 = U->getOperand(1);
1000 SDValue SY = (S0.getNode() == I) ? S1 : S0;
1001
1002 SDNode *UUse = *U->use_begin();
1003 if (UUse->getNumValues() != 1)
1004 return false;
1005
1006 // Check if one of the inputs to U is a load instruction and the output
1007 // is used by a store instruction. If so and they also have the same
1008 // base pointer, then don't preoprocess this node sequence as it
1009 // can be matched to a memop.
1010 SDNode *SYNode = SY.getNode();
1011 if (UUse->getOpcode() == ISD::STORE && SYNode->getOpcode() == ISD::LOAD) {
1012 SDValue LDBasePtr = cast<MemSDNode>(SYNode)->getBasePtr();
1013 SDValue STBasePtr = cast<MemSDNode>(UUse)->getBasePtr();
1014 if (LDBasePtr == STBasePtr)
1015 return true;
1016 }
1017 return false;
1018}
1019
1020
1021// Transform: (or (select c x 0) z) -> (select c (or x z) z)
1022// (or (select c 0 y) z) -> (select c z (or y z))
1023void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
1024 SelectionDAG &DAG = *CurDAG;
1025
1026 for (auto *I : Nodes) {
1027 if (I->getOpcode() != ISD::OR)
1028 continue;
1029
1030 auto IsSelect0 = [](const SDValue &Op) -> bool {
1031 if (Op.getOpcode() != ISD::SELECT)
1032 return false;
1033 return isNullConstant(Op.getOperand(1)) ||
1034 isNullConstant(Op.getOperand(2));
1035 };
1036
1037 SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
1038 EVT VT = I->getValueType(0);
1039 bool SelN0 = IsSelect0(N0);
1040 SDValue SOp = SelN0 ? N0 : N1;
1041 SDValue VOp = SelN0 ? N1 : N0;
1042
1043 if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) {
1044 SDValue SC = SOp.getOperand(0);
1045 SDValue SX = SOp.getOperand(1);
1046 SDValue SY = SOp.getOperand(2);
1047 SDLoc DLS = SOp;
1048 if (isNullConstant(SY)) {
1049 SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
1050 SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
1051 DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1052 } else if (isNullConstant(SX)) {
1053 SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
1054 SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
1055 DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1056 }
1057 }
1058 }
1059}
1060
1061// Transform: (store ch val (add x (add (shl y c) e)))
1062// to: (store ch val (add x (shl (add y d) c))),
1063// where e = (shl d c) for some integer d.
1064// The purpose of this is to enable generation of loads/stores with
1065// shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1066// value c must be 0, 1 or 2.
1067void HexagonDAGToDAGISel::ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes) {
1068 SelectionDAG &DAG = *CurDAG;
1069
1070 for (auto *I : Nodes) {
1071 if (I->getOpcode() != ISD::STORE)
1072 continue;
1073
1074 // I matched: (store ch val Off)
1075 SDValue Off = I->getOperand(2);
1076 // Off needs to match: (add x (add (shl y c) (shl d c))))
1077 if (Off.getOpcode() != ISD::ADD)
1078 continue;
1079 // Off matched: (add x T0)
1080 SDValue T0 = Off.getOperand(1);
1081 // T0 needs to match: (add T1 T2):
1082 if (T0.getOpcode() != ISD::ADD)
1083 continue;
1084 // T0 matched: (add T1 T2)
1085 SDValue T1 = T0.getOperand(0);
1086 SDValue T2 = T0.getOperand(1);
1087 // T1 needs to match: (shl y c)
1088 if (T1.getOpcode() != ISD::SHL)
1089 continue;
1090 SDValue C = T1.getOperand(1);
1091 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(C.getNode());
1092 if (CN == nullptr)
1093 continue;
1094 unsigned CV = CN->getZExtValue();
1095 if (CV > 2)
1096 continue;
1097 // T2 needs to match e, where e = (shl d c) for some d.
1098 ConstantSDNode *EN = dyn_cast<ConstantSDNode>(T2.getNode());
1099 if (EN == nullptr)
1100 continue;
1101 unsigned EV = EN->getZExtValue();
1102 if (EV % (1 << CV) != 0)
1103 continue;
1104 unsigned DV = EV / (1 << CV);
1105
1106 // Replace T0 with: (shl (add y d) c)
1107 SDLoc DL = SDLoc(I);
1108 EVT VT = T0.getValueType();
1109 SDValue D = DAG.getConstant(DV, DL, VT);
1110 // NewAdd = (add y d)
1111 SDValue NewAdd = DAG.getNode(ISD::ADD, DL, VT, T1.getOperand(0), D);
1112 // NewShl = (shl NewAdd c)
1113 SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
1114 ReplaceNode(T0.getNode(), NewShl.getNode());
1115 }
1116}
1117
1118// Transform: (load ch (add x (and (srl y c) Mask)))
1119// to: (load ch (add x (shl (srl y d) d-c)))
1120// where
1121// Mask = 00..0 111..1 0.0
1122// | | +-- d-c 0s, and d-c is 0, 1 or 2.
1123// | +-------- 1s
1124// +-------------- at most c 0s
1125// Motivating example:
1126// DAG combiner optimizes (add x (shl (srl y 5) 2))
1127// to (add x (and (srl y 3) 1FFFFFFC))
1128// which results in a constant-extended and(##...,lsr). This transformation
1129// undoes this simplification for cases where the shl can be folded into
1130// an addressing mode.
1131void HexagonDAGToDAGISel::ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes) {
1132 SelectionDAG &DAG = *CurDAG;
1133
1134 for (SDNode *N : Nodes) {
1135 unsigned Opc = N->getOpcode();
1136 if (Opc != ISD::LOAD && Opc != ISD::STORE)
1137 continue;
1138 SDValue Addr = Opc == ISD::LOAD ? N->getOperand(1) : N->getOperand(2);
1139 // Addr must match: (add x T0)
1140 if (Addr.getOpcode() != ISD::ADD)
1141 continue;
1142 SDValue T0 = Addr.getOperand(1);
1143 // T0 must match: (and T1 Mask)
1144 if (T0.getOpcode() != ISD::AND)
1145 continue;
1146
1147 // We have an AND.
1148 //
1149 // Check the first operand. It must be: (srl y c).
1150 SDValue S = T0.getOperand(0);
1151 if (S.getOpcode() != ISD::SRL)
1152 continue;
1153 ConstantSDNode *SN = dyn_cast<ConstantSDNode>(S.getOperand(1).getNode());
1154 if (SN == nullptr)
1155 continue;
1156 if (SN->getAPIntValue().getBitWidth() != 32)
1157 continue;
1158 uint32_t CV = SN->getZExtValue();
1159
1160 // Check the second operand: the supposed mask.
1161 ConstantSDNode *MN = dyn_cast<ConstantSDNode>(T0.getOperand(1).getNode());
1162 if (MN == nullptr)
1163 continue;
1164 if (MN->getAPIntValue().getBitWidth() != 32)
1165 continue;
1166 uint32_t Mask = MN->getZExtValue();
1167 // Examine the mask.
1168 uint32_t TZ = llvm::countr_zero(Mask);
1169 uint32_t M1 = llvm::countr_one(Mask >> TZ);
1170 uint32_t LZ = llvm::countl_zero(Mask);
1171 // Trailing zeros + middle ones + leading zeros must equal the width.
1172 if (TZ + M1 + LZ != 32)
1173 continue;
1174 // The number of trailing zeros will be encoded in the addressing mode.
1175 if (TZ > 2)
1176 continue;
1177 // The number of leading zeros must be at most c.
1178 if (LZ > CV)
1179 continue;
1180
1181 // All looks good.
1182 SDValue Y = S.getOperand(0);
1183 EVT VT = Addr.getValueType();
1184 SDLoc dl(S);
1185 // TZ = D-C, so D = TZ+C.
1186 SDValue D = DAG.getConstant(TZ+CV, dl, VT);
1187 SDValue DC = DAG.getConstant(TZ, dl, VT);
1188 SDValue NewSrl = DAG.getNode(ISD::SRL, dl, VT, Y, D);
1189 SDValue NewShl = DAG.getNode(ISD::SHL, dl, VT, NewSrl, DC);
1190 ReplaceNode(T0.getNode(), NewShl.getNode());
1191 }
1192}
1193
1194// Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1195// (op ... 1 ...))
1196void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) {
1197 SelectionDAG &DAG = *CurDAG;
1198
1199 for (SDNode *N : Nodes) {
1200 unsigned Opc = N->getOpcode();
1201 if (Opc != ISD::ZERO_EXTEND)
1202 continue;
1203 SDValue OpI1 = N->getOperand(0);
1204 EVT OpVT = OpI1.getValueType();
1205 if (!OpVT.isSimple() || OpVT.getSimpleVT() != MVT::i1)
1206 continue;
1207 for (auto I = N->use_begin(), E = N->use_end(); I != E; ++I) {
1208 SDNode *U = *I;
1209 if (U->getNumValues() != 1)
1210 continue;
1211 EVT UVT = U->getValueType(0);
1212 if (!UVT.isSimple() || !UVT.isInteger() || UVT.getSimpleVT() == MVT::i1)
1213 continue;
1214 // Do not generate select for all i1 vector type.
1215 if (UVT.isVector() && UVT.getVectorElementType() == MVT::i1)
1216 continue;
1217 if (isMemOPCandidate(N, U))
1218 continue;
1219
1220 // Potentially simplifiable operation.
1221 unsigned I1N = I.getOperandNo();
1222 SmallVector<SDValue,2> Ops(U->getNumOperands());
1223 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i)
1224 Ops[i] = U->getOperand(i);
1225 EVT BVT = Ops[I1N].getValueType();
1226
1227 const SDLoc &dl(U);
1228 SDValue C0 = DAG.getConstant(0, dl, BVT);
1229 SDValue C1 = DAG.getConstant(1, dl, BVT);
1230 SDValue If0, If1;
1231
1232 if (isa<MachineSDNode>(U)) {
1233 unsigned UseOpc = U->getMachineOpcode();
1234 Ops[I1N] = C0;
1235 If0 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1236 Ops[I1N] = C1;
1237 If1 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1238 } else {
1239 unsigned UseOpc = U->getOpcode();
1240 Ops[I1N] = C0;
1241 If0 = DAG.getNode(UseOpc, dl, UVT, Ops);
1242 Ops[I1N] = C1;
1243 If1 = DAG.getNode(UseOpc, dl, UVT, Ops);
1244 }
1245 // We're generating a SELECT way after legalization, so keep the types
1246 // simple.
1247 unsigned UW = UVT.getSizeInBits();
1248 EVT SVT = (UW == 32 || UW == 64) ? MVT::getIntegerVT(UW) : UVT;
1249 SDValue Sel = DAG.getNode(ISD::SELECT, dl, SVT, OpI1,
1250 DAG.getBitcast(SVT, If1),
1251 DAG.getBitcast(SVT, If0));
1252 SDValue Ret = DAG.getBitcast(UVT, Sel);
1253 DAG.ReplaceAllUsesWith(U, Ret.getNode());
1254 }
1255 }
1256}
1257
1259 // Repack all nodes before calling each preprocessing function,
1260 // because each of them can modify the set of nodes.
1261 auto getNodes = [this]() -> std::vector<SDNode *> {
1262 std::vector<SDNode *> T;
1263 T.reserve(CurDAG->allnodes_size());
1264 for (SDNode &N : CurDAG->allnodes())
1265 T.push_back(&N);
1266 return T;
1267 };
1268
1269 if (HST->useHVXOps())
1270 PreprocessHvxISelDAG();
1271
1272 // Transform: (or (select c x 0) z) -> (select c (or x z) z)
1273 // (or (select c 0 y) z) -> (select c z (or y z))
1274 ppSimplifyOrSelect0(getNodes());
1275
1276 // Transform: (store ch val (add x (add (shl y c) e)))
1277 // to: (store ch val (add x (shl (add y d) c))),
1278 // where e = (shl d c) for some integer d.
1279 // The purpose of this is to enable generation of loads/stores with
1280 // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1281 // value c must be 0, 1 or 2.
1282 ppAddrReorderAddShl(getNodes());
1283
1284 // Transform: (load ch (add x (and (srl y c) Mask)))
1285 // to: (load ch (add x (shl (srl y d) d-c)))
1286 // where
1287 // Mask = 00..0 111..1 0.0
1288 // | | +-- d-c 0s, and d-c is 0, 1 or 2.
1289 // | +-------- 1s
1290 // +-------------- at most c 0s
1291 // Motivating example:
1292 // DAG combiner optimizes (add x (shl (srl y 5) 2))
1293 // to (add x (and (srl y 3) 1FFFFFFC))
1294 // which results in a constant-extended and(##...,lsr). This transformation
1295 // undoes this simplification for cases where the shl can be folded into
1296 // an addressing mode.
1297 ppAddrRewriteAndSrl(getNodes());
1298
1299 // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1300 // (op ... 1 ...))
1301 ppHoistZextI1(getNodes());
1302
1303 DEBUG_WITH_TYPE("isel", {
1304 dbgs() << "Preprocessed (Hexagon) selection DAG:";
1305 CurDAG->dump();
1306 });
1307
1309 rebalanceAddressTrees();
1310
1311 DEBUG_WITH_TYPE("isel", {
1312 dbgs() << "Address tree balanced selection DAG:";
1313 CurDAG->dump();
1314 });
1315 }
1316}
1317
1319 auto &HST = MF->getSubtarget<HexagonSubtarget>();
1320 auto &HFI = *HST.getFrameLowering();
1321 if (!HFI.needsAligna(*MF))
1322 return;
1323
1325 MachineBasicBlock *EntryBB = &MF->front();
1326 Align EntryMaxA = MFI.getMaxAlign();
1327
1328 // Reserve the first non-volatile register.
1329 Register AP = 0;
1330 auto &HRI = *HST.getRegisterInfo();
1332 for (const MCPhysReg *R = HRI.getCalleeSavedRegs(MF); *R; ++R) {
1333 if (Reserved[*R])
1334 continue;
1335 AP = *R;
1336 break;
1337 }
1338 assert(AP.isValid() && "Couldn't reserve stack align register");
1339 BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AP)
1340 .addImm(EntryMaxA.value());
1341 MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseReg(AP);
1342}
1343
1344void HexagonDAGToDAGISel::updateAligna() {
1345 auto &HFI = *MF->getSubtarget<HexagonSubtarget>().getFrameLowering();
1346 if (!HFI.needsAligna(*MF))
1347 return;
1348 auto *AlignaI = const_cast<MachineInstr*>(HFI.getAlignaInstr(*MF));
1349 assert(AlignaI != nullptr);
1350 unsigned MaxA = MF->getFrameInfo().getMaxAlign().value();
1351 if (AlignaI->getOperand(1).getImm() < MaxA)
1352 AlignaI->getOperand(1).setImm(MaxA);
1353}
1354
1355// Match a frame index that can be used in an addressing mode.
1357 if (N.getOpcode() != ISD::FrameIndex)
1358 return false;
1359 auto &HFI = *HST->getFrameLowering();
1361 int FX = cast<FrameIndexSDNode>(N)->getIndex();
1362 if (!MFI.isFixedObjectIndex(FX) && HFI.needsAligna(*MF))
1363 return false;
1364 R = CurDAG->getTargetFrameIndex(FX, MVT::i32);
1365 return true;
1366}
1367
1369 return SelectGlobalAddress(N, R, false, Align(1));
1370}
1371
1373 return SelectGlobalAddress(N, R, true, Align(1));
1374}
1375
1377 return SelectAnyImmediate(N, R, Align(1));
1378}
1379
1381 return SelectAnyImmediate(N, R, Align(1));
1382}
1384 return SelectAnyImmediate(N, R, Align(2));
1385}
1387 return SelectAnyImmediate(N, R, Align(4));
1388}
1390 return SelectAnyImmediate(N, R, Align(8));
1391}
1392
1394 EVT T = N.getValueType();
1395 if (!T.isInteger() || T.getSizeInBits() != 32 || !isa<ConstantSDNode>(N))
1396 return false;
1397 int32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
1398 R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
1399 return true;
1400}
1401
1403 Align Alignment) {
1404 switch (N.getOpcode()) {
1405 case ISD::Constant: {
1406 if (N.getValueType() != MVT::i32)
1407 return false;
1408 int32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
1409 if (!isAligned(Alignment, V))
1410 return false;
1411 R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
1412 return true;
1413 }
1414 case HexagonISD::JT:
1415 case HexagonISD::CP:
1416 // These are assumed to always be aligned at least 8-byte boundary.
1417 if (Alignment > Align(8))
1418 return false;
1419 R = N.getOperand(0);
1420 return true;
1422 // Symbols may be aligned at any boundary.
1423 if (Alignment > Align(1))
1424 return false;
1425 R = N;
1426 return true;
1427 case ISD::BlockAddress:
1428 // Block address is always aligned at least 4-byte boundary.
1429 if (Alignment > Align(4) ||
1430 !isAligned(Alignment, cast<BlockAddressSDNode>(N)->getOffset()))
1431 return false;
1432 R = N;
1433 return true;
1434 }
1435
1436 if (SelectGlobalAddress(N, R, false, Alignment) ||
1437 SelectGlobalAddress(N, R, true, Alignment))
1438 return true;
1439
1440 return false;
1441}
1442
1444 bool UseGP, Align Alignment) {
1445 switch (N.getOpcode()) {
1446 case ISD::ADD: {
1447 SDValue N0 = N.getOperand(0);
1448 SDValue N1 = N.getOperand(1);
1449 unsigned GAOpc = N0.getOpcode();
1450 if (UseGP && GAOpc != HexagonISD::CONST32_GP)
1451 return false;
1452 if (!UseGP && GAOpc != HexagonISD::CONST32)
1453 return false;
1454 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) {
1455 if (!isAligned(Alignment, Const->getZExtValue()))
1456 return false;
1457 SDValue Addr = N0.getOperand(0);
1458 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) {
1459 if (GA->getOpcode() == ISD::TargetGlobalAddress) {
1460 uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
1461 R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const),
1462 N.getValueType(), NewOff);
1463 return true;
1464 }
1465 }
1466 }
1467 break;
1468 }
1469 case HexagonISD::CP:
1470 case HexagonISD::JT:
1472 // The operand(0) of CONST32 is TargetGlobalAddress, which is what we
1473 // want in the instruction.
1474 if (!UseGP)
1475 R = N.getOperand(0);
1476 return !UseGP;
1478 if (UseGP)
1479 R = N.getOperand(0);
1480 return UseGP;
1481 default:
1482 return false;
1483 }
1484
1485 return false;
1486}
1487
1489 // This (complex pattern) function is meant to detect a sign-extension
1490 // i32->i64 on a per-operand basis. This would allow writing single
1491 // patterns that would cover a number of combinations of different ways
1492 // a sign-extensions could be written. For example:
1493 // (mul (DetectUseSxtw x) (DetectUseSxtw y)) -> (M2_dpmpyss_s0 x y)
1494 // could match either one of these:
1495 // (mul (sext x) (sext_inreg y))
1496 // (mul (sext-load *p) (sext_inreg y))
1497 // (mul (sext_inreg x) (sext y))
1498 // etc.
1499 //
1500 // The returned value will have type i64 and its low word will
1501 // contain the value being extended. The high bits are not specified.
1502 // The returned type is i64 because the original type of N was i64,
1503 // but the users of this function should only use the low-word of the
1504 // result, e.g.
1505 // (mul sxtw:x, sxtw:y) -> (M2_dpmpyss_s0 (LoReg sxtw:x), (LoReg sxtw:y))
1506
1507 if (N.getValueType() != MVT::i64)
1508 return false;
1509 unsigned Opc = N.getOpcode();
1510 switch (Opc) {
1511 case ISD::SIGN_EXTEND:
1513 // sext_inreg has the source type as a separate operand.
1514 EVT T = Opc == ISD::SIGN_EXTEND
1515 ? N.getOperand(0).getValueType()
1516 : cast<VTSDNode>(N.getOperand(1))->getVT();
1517 unsigned SW = T.getSizeInBits();
1518 if (SW == 32)
1519 R = N.getOperand(0);
1520 else if (SW < 32)
1521 R = N;
1522 else
1523 return false;
1524 break;
1525 }
1526 case ISD::LOAD: {
1527 LoadSDNode *L = cast<LoadSDNode>(N);
1528 if (L->getExtensionType() != ISD::SEXTLOAD)
1529 return false;
1530 // All extending loads extend to i32, so even if the value in
1531 // memory is shorter than 32 bits, it will be i32 after the load.
1532 if (L->getMemoryVT().getSizeInBits() > 32)
1533 return false;
1534 R = N;
1535 break;
1536 }
1537 case ISD::SRA: {
1538 auto *S = dyn_cast<ConstantSDNode>(N.getOperand(1));
1539 if (!S || S->getZExtValue() != 32)
1540 return false;
1541 R = N;
1542 break;
1543 }
1544 default:
1545 return false;
1546 }
1547 EVT RT = R.getValueType();
1548 if (RT == MVT::i64)
1549 return true;
1550 assert(RT == MVT::i32);
1551 // This is only to produce a value of type i64. Do not rely on the
1552 // high bits produced by this.
1553 const SDLoc &dl(N);
1554 SDValue Ops[] = {
1555 CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
1556 R, CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
1557 R, CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
1558 };
1559 SDNode *T = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
1560 MVT::i64, Ops);
1561 R = SDValue(T, 0);
1562 return true;
1563}
1564
1565bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
1566 SDValue &Src) {
1567 unsigned Opc = Val.getOpcode();
1568 switch (Opc) {
1569 case ISD::SIGN_EXTEND:
1570 case ISD::ZERO_EXTEND:
1571 case ISD::ANY_EXTEND: {
1572 const SDValue &Op0 = Val.getOperand(0);
1573 EVT T = Op0.getValueType();
1574 if (T.isInteger() && T.getSizeInBits() == NumBits) {
1575 Src = Op0;
1576 return true;
1577 }
1578 break;
1579 }
1581 case ISD::AssertSext:
1582 case ISD::AssertZext:
1583 if (Val.getOperand(0).getValueType().isInteger()) {
1584 VTSDNode *T = cast<VTSDNode>(Val.getOperand(1));
1585 if (T->getVT().getSizeInBits() == NumBits) {
1586 Src = Val.getOperand(0);
1587 return true;
1588 }
1589 }
1590 break;
1591 case ISD::AND: {
1592 // Check if this is an AND with NumBits of lower bits set to 1.
1593 uint64_t Mask = (1ULL << NumBits) - 1;
1594 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1595 if (C->getZExtValue() == Mask) {
1596 Src = Val.getOperand(1);
1597 return true;
1598 }
1599 }
1600 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1601 if (C->getZExtValue() == Mask) {
1602 Src = Val.getOperand(0);
1603 return true;
1604 }
1605 }
1606 break;
1607 }
1608 case ISD::OR:
1609 case ISD::XOR: {
1610 // OR/XOR with the lower NumBits bits set to 0.
1611 uint64_t Mask = (1ULL << NumBits) - 1;
1612 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1613 if ((C->getZExtValue() & Mask) == 0) {
1614 Src = Val.getOperand(1);
1615 return true;
1616 }
1617 }
1618 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1619 if ((C->getZExtValue() & Mask) == 0) {
1620 Src = Val.getOperand(0);
1621 return true;
1622 }
1623 }
1624 break;
1625 }
1626 default:
1627 break;
1628 }
1629 return false;
1630}
1631
1632bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
1633 return N->getAlign().value() >= N->getMemoryVT().getStoreSize();
1634}
1635
1636bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const {
1637 unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF);
1638 switch (N->getMemoryVT().getStoreSize()) {
1639 case 1:
1640 return StackSize <= 56; // 1*2^6 - 8
1641 case 2:
1642 return StackSize <= 120; // 2*2^6 - 8
1643 case 4:
1644 return StackSize <= 248; // 4*2^6 - 8
1645 default:
1646 return false;
1647 }
1648}
1649
1650// Return true when the given node fits in a positive half word.
1651bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const {
1652 if (const ConstantSDNode *CN = dyn_cast<const ConstantSDNode>(N)) {
1653 int64_t V = CN->getSExtValue();
1654 return V > 0 && isInt<16>(V);
1655 }
1656 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
1657 const VTSDNode *VN = dyn_cast<const VTSDNode>(N->getOperand(1));
1658 return VN->getVT().getSizeInBits() <= 16;
1659 }
1660 return false;
1661}
1662
1663bool HexagonDAGToDAGISel::hasOneUse(const SDNode *N) const {
1664 return !CheckSingleUse || N->hasOneUse();
1665}
1666
1667////////////////////////////////////////////////////////////////////////////////
1668// Rebalancing of address calculation trees
1669
1670static bool isOpcodeHandled(const SDNode *N) {
1671 switch (N->getOpcode()) {
1672 case ISD::ADD:
1673 case ISD::MUL:
1674 return true;
1675 case ISD::SHL:
1676 // We only handle constant shifts because these can be easily flattened
1677 // into multiplications by 2^Op1.
1678 return isa<ConstantSDNode>(N->getOperand(1).getNode());
1679 default:
1680 return false;
1681 }
1682}
1683
1684/// Return the weight of an SDNode
1685int HexagonDAGToDAGISel::getWeight(SDNode *N) {
1686 if (!isOpcodeHandled(N))
1687 return 1;
1688 assert(RootWeights.count(N) && "Cannot get weight of unseen root!");
1689 assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!");
1690 assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!");
1691 return RootWeights[N];
1692}
1693
1694int HexagonDAGToDAGISel::getHeight(SDNode *N) {
1695 if (!isOpcodeHandled(N))
1696 return 0;
1697 assert(RootWeights.count(N) && RootWeights[N] >= 0 &&
1698 "Cannot query height of unvisited/RAUW'd node!");
1699 return RootHeights[N];
1700}
1701
1702namespace {
1703struct WeightedLeaf {
1704 SDValue Value;
1705 int Weight;
1706 int InsertionOrder;
1707
1708 WeightedLeaf() {}
1709
1710 WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
1711 Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
1712 assert(Weight >= 0 && "Weight must be >= 0");
1713 }
1714
1715 static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) {
1716 assert(A.Value.getNode() && B.Value.getNode());
1717 return A.Weight == B.Weight ?
1718 (A.InsertionOrder > B.InsertionOrder) :
1719 (A.Weight > B.Weight);
1720 }
1721};
1722
1723/// A specialized priority queue for WeigthedLeaves. It automatically folds
1724/// constants and allows removal of non-top elements while maintaining the
1725/// priority order.
1726class LeafPrioQueue {
1728 bool HaveConst;
1729 WeightedLeaf ConstElt;
1730 unsigned Opcode;
1731
1732public:
1733 bool empty() {
1734 return (!HaveConst && Q.empty());
1735 }
1736
1737 size_t size() {
1738 return Q.size() + HaveConst;
1739 }
1740
1741 bool hasConst() {
1742 return HaveConst;
1743 }
1744
1745 const WeightedLeaf &top() {
1746 if (HaveConst)
1747 return ConstElt;
1748 return Q.front();
1749 }
1750
1751 WeightedLeaf pop() {
1752 if (HaveConst) {
1753 HaveConst = false;
1754 return ConstElt;
1755 }
1756 std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1757 return Q.pop_back_val();
1758 }
1759
1760 void push(WeightedLeaf L, bool SeparateConst=true) {
1761 if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) {
1762 if (Opcode == ISD::MUL &&
1763 cast<ConstantSDNode>(L.Value)->getSExtValue() == 1)
1764 return;
1765 if (Opcode == ISD::ADD &&
1766 cast<ConstantSDNode>(L.Value)->getSExtValue() == 0)
1767 return;
1768
1769 HaveConst = true;
1770 ConstElt = L;
1771 } else {
1772 Q.push_back(L);
1773 std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1774 }
1775 }
1776
1777 /// Push L to the bottom of the queue regardless of its weight. If L is
1778 /// constant, it will not be folded with other constants in the queue.
1779 void pushToBottom(WeightedLeaf L) {
1780 L.Weight = 1000;
1781 push(L, false);
1782 }
1783
1784 /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of
1785 /// lowest weight and remove it from the queue.
1786 WeightedLeaf findSHL(uint64_t MaxAmount);
1787
1788 WeightedLeaf findMULbyConst();
1789
1790 LeafPrioQueue(unsigned Opcode) :
1791 HaveConst(false), Opcode(Opcode) { }
1792};
1793} // end anonymous namespace
1794
1795WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) {
1796 int ResultPos;
1797 WeightedLeaf Result;
1798
1799 for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1800 const WeightedLeaf &L = Q[Pos];
1801 const SDValue &Val = L.Value;
1802 if (Val.getOpcode() != ISD::SHL ||
1803 !isa<ConstantSDNode>(Val.getOperand(1)) ||
1804 Val.getConstantOperandVal(1) > MaxAmount)
1805 continue;
1806 if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1807 (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1808 {
1809 Result = L;
1810 ResultPos = Pos;
1811 }
1812 }
1813
1814 if (Result.Value.getNode()) {
1815 Q.erase(&Q[ResultPos]);
1816 std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1817 }
1818
1819 return Result;
1820}
1821
1822WeightedLeaf LeafPrioQueue::findMULbyConst() {
1823 int ResultPos;
1824 WeightedLeaf Result;
1825
1826 for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1827 const WeightedLeaf &L = Q[Pos];
1828 const SDValue &Val = L.Value;
1829 if (Val.getOpcode() != ISD::MUL ||
1830 !isa<ConstantSDNode>(Val.getOperand(1)) ||
1831 Val.getConstantOperandVal(1) > 127)
1832 continue;
1833 if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1834 (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1835 {
1836 Result = L;
1837 ResultPos = Pos;
1838 }
1839 }
1840
1841 if (Result.Value.getNode()) {
1842 Q.erase(&Q[ResultPos]);
1843 std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1844 }
1845
1846 return Result;
1847}
1848
1849SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) {
1850 uint64_t MulFactor = 1ull << N->getConstantOperandVal(1);
1851 return CurDAG->getConstant(MulFactor, SDLoc(N),
1852 N->getOperand(1).getValueType());
1853}
1854
1855/// @returns the value x for which 2^x is a factor of Val
1856static unsigned getPowerOf2Factor(SDValue Val) {
1857 if (Val.getOpcode() == ISD::MUL) {
1858 unsigned MaxFactor = 0;
1859 for (int i = 0; i < 2; ++i) {
1860 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(i));
1861 if (!C)
1862 continue;
1863 const APInt &CInt = C->getAPIntValue();
1864 if (CInt.getBoolValue())
1865 MaxFactor = CInt.countr_zero();
1866 }
1867 return MaxFactor;
1868 }
1869 if (Val.getOpcode() == ISD::SHL) {
1870 if (!isa<ConstantSDNode>(Val.getOperand(1).getNode()))
1871 return 0;
1872 return (unsigned) Val.getConstantOperandVal(1);
1873 }
1874
1875 return 0;
1876}
1877
1878/// @returns true if V>>Amount will eliminate V's operation on its child
1879static bool willShiftRightEliminate(SDValue V, unsigned Amount) {
1880 if (V.getOpcode() == ISD::MUL) {
1881 SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1882 for (int i = 0; i < 2; ++i)
1883 if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1884 V.getConstantOperandVal(i) % (1ULL << Amount) == 0) {
1885 uint64_t NewConst = V.getConstantOperandVal(i) >> Amount;
1886 return (NewConst == 1);
1887 }
1888 } else if (V.getOpcode() == ISD::SHL) {
1889 return (Amount == V.getConstantOperandVal(1));
1890 }
1891
1892 return false;
1893}
1894
1895SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) {
1896 SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1897 if (V.getOpcode() == ISD::MUL) {
1898 for (int i=0; i < 2; ++i) {
1899 if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1900 V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) {
1901 uint64_t NewConst = V.getConstantOperandVal(i) >> Power;
1902 if (NewConst == 1)
1903 return Ops[!i];
1904 Ops[i] = CurDAG->getConstant(NewConst,
1905 SDLoc(V), V.getValueType());
1906 break;
1907 }
1908 }
1909 } else if (V.getOpcode() == ISD::SHL) {
1910 uint64_t ShiftAmount = V.getConstantOperandVal(1);
1911 if (ShiftAmount == Power)
1912 return Ops[0];
1913 Ops[1] = CurDAG->getConstant(ShiftAmount - Power,
1914 SDLoc(V), V.getValueType());
1915 }
1916
1917 return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops);
1918}
1919
1920static bool isTargetConstant(const SDValue &V) {
1921 return V.getOpcode() == HexagonISD::CONST32 ||
1922 V.getOpcode() == HexagonISD::CONST32_GP;
1923}
1924
1925unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) {
1926 if (GAUsesInFunction.count(V))
1927 return GAUsesInFunction[V];
1928
1929 unsigned Result = 0;
1930 const Function &CurF = CurDAG->getMachineFunction().getFunction();
1931 for (const User *U : V->users()) {
1932 if (isa<Instruction>(U) &&
1933 cast<Instruction>(U)->getParent()->getParent() == &CurF)
1934 ++Result;
1935 }
1936
1937 GAUsesInFunction[V] = Result;
1938
1939 return Result;
1940}
1941
1942/// Note - After calling this, N may be dead. It may have been replaced by a
1943/// new node, so always use the returned value in place of N.
1944///
1945/// @returns The SDValue taking the place of N (which could be N if it is
1946/// unchanged)
1947SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
1948 assert(RootWeights.count(N) && "Cannot balance non-root node.");
1949 assert(RootWeights[N] != -2 && "This node was RAUW'd!");
1950 assert(!TopLevel || N->getOpcode() == ISD::ADD);
1951
1952 // Return early if this node was already visited
1953 if (RootWeights[N] != -1)
1954 return SDValue(N, 0);
1955
1957
1958 SDValue Op0 = N->getOperand(0);
1959 SDValue Op1 = N->getOperand(1);
1960
1961 // Return early if the operands will remain unchanged or are all roots
1962 if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) &&
1963 (!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) {
1964 SDNode *Op0N = Op0.getNode();
1965 int Weight;
1966 if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) {
1967 Weight = getWeight(balanceSubTree(Op0N).getNode());
1968 // Weight = calculateWeight(Op0N);
1969 } else
1970 Weight = getWeight(Op0N);
1971
1972 SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd
1973 if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) {
1974 Weight += getWeight(balanceSubTree(Op1N).getNode());
1975 // Weight += calculateWeight(Op1N);
1976 } else
1977 Weight += getWeight(Op1N);
1978
1979 RootWeights[N] = Weight;
1980 RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
1981 getHeight(N->getOperand(1).getNode())) + 1;
1982
1983 LLVM_DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
1984 << " Height=" << RootHeights[N] << "): ");
1985 LLVM_DEBUG(N->dump(CurDAG));
1986
1987 return SDValue(N, 0);
1988 }
1989
1990 LLVM_DEBUG(dbgs() << "** Balancing root node: ");
1991 LLVM_DEBUG(N->dump(CurDAG));
1992
1993 unsigned NOpcode = N->getOpcode();
1994
1995 LeafPrioQueue Leaves(NOpcode);
1996 SmallVector<SDValue, 4> Worklist;
1997 Worklist.push_back(SDValue(N, 0));
1998
1999 // SHL nodes will be converted to MUL nodes
2000 if (NOpcode == ISD::SHL)
2001 NOpcode = ISD::MUL;
2002
2003 bool CanFactorize = false;
2004 WeightedLeaf Mul1, Mul2;
2005 unsigned MaxPowerOf2 = 0;
2006 WeightedLeaf GA;
2007
2008 // Do not try to factor out a shift if there is already a shift at the tip of
2009 // the tree.
2010 bool HaveTopLevelShift = false;
2011 if (TopLevel &&
2012 ((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL &&
2013 Op0.getConstantOperandVal(1) < 4) ||
2014 (isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL &&
2015 Op1.getConstantOperandVal(1) < 4)))
2016 HaveTopLevelShift = true;
2017
2018 // Flatten the subtree into an ordered list of leaves; at the same time
2019 // determine whether the tree is already balanced.
2020 int InsertionOrder = 0;
2021 SmallDenseMap<SDValue, int> NodeHeights;
2022 bool Imbalanced = false;
2023 int CurrentWeight = 0;
2024 while (!Worklist.empty()) {
2025 SDValue Child = Worklist.pop_back_val();
2026
2027 if (Child.getNode() != N && RootWeights.count(Child.getNode())) {
2028 // CASE 1: Child is a root note
2029
2030 int Weight = RootWeights[Child.getNode()];
2031 if (Weight == -1) {
2032 Child = balanceSubTree(Child.getNode());
2033 // calculateWeight(Child.getNode());
2034 Weight = getWeight(Child.getNode());
2035 } else if (Weight == -2) {
2036 // Whoops, this node was RAUWd by one of the balanceSubTree calls we
2037 // made. Our worklist isn't up to date anymore.
2038 // Restart the whole process.
2039 LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
2040 return balanceSubTree(N, TopLevel);
2041 }
2042
2043 NodeHeights[Child] = 1;
2044 CurrentWeight += Weight;
2045
2046 unsigned PowerOf2;
2047 if (TopLevel && !CanFactorize && !HaveTopLevelShift &&
2048 (Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) &&
2049 Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) {
2050 // Try to identify two factorizable MUL/SHL children greedily. Leave
2051 // them out of the priority queue for now so we can deal with them
2052 // after.
2053 if (!Mul1.Value.getNode()) {
2054 Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++);
2055 MaxPowerOf2 = PowerOf2;
2056 } else {
2057 Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++);
2058 MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2);
2059
2060 // Our addressing modes can only shift by a maximum of 3
2061 if (MaxPowerOf2 > 3)
2062 MaxPowerOf2 = 3;
2063
2064 CanFactorize = true;
2065 }
2066 } else
2067 Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
2068 } else if (!isOpcodeHandled(Child.getNode())) {
2069 // CASE 2: Child is an unhandled kind of node (e.g. constant)
2070 int Weight = getWeight(Child.getNode());
2071
2072 NodeHeights[Child] = getHeight(Child.getNode());
2073 CurrentWeight += Weight;
2074
2075 if (isTargetConstant(Child) && !GA.Value.getNode())
2076 GA = WeightedLeaf(Child, Weight, InsertionOrder++);
2077 else
2078 Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
2079 } else {
2080 // CASE 3: Child is a subtree of same opcode
2081 // Visit children first, then flatten.
2082 unsigned ChildOpcode = Child.getOpcode();
2083 assert(ChildOpcode == NOpcode ||
2084 (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL));
2085
2086 // Convert SHL to MUL
2087 SDValue Op1;
2088 if (ChildOpcode == ISD::SHL)
2089 Op1 = getMultiplierForSHL(Child.getNode());
2090 else
2091 Op1 = Child->getOperand(1);
2092
2093 if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) {
2094 assert(!NodeHeights.count(Child) && "Parent visited before children?");
2095 // Visit children first, then re-visit this node
2096 Worklist.push_back(Child);
2097 Worklist.push_back(Op1);
2098 Worklist.push_back(Child->getOperand(0));
2099 } else {
2100 // Back at this node after visiting the children
2101 if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1)
2102 Imbalanced = true;
2103
2104 NodeHeights[Child] = std::max(NodeHeights[Op1],
2105 NodeHeights[Child->getOperand(0)]) + 1;
2106 }
2107 }
2108 }
2109
2110 LLVM_DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
2111 << " weight=" << CurrentWeight
2112 << " imbalanced=" << Imbalanced << "\n");
2113
2114 // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
2115 // This factors out a shift in order to match memw(a<<Y+b).
2116 if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
2117 willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) {
2118 LLVM_DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
2119 int Weight = Mul1.Weight + Mul2.Weight;
2120 int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
2121 SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
2122 SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2);
2123 SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(),
2124 Mul1Factored, Mul2Factored);
2125 SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N),
2126 Mul1.Value.getValueType());
2127 SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(),
2128 Sum, Const);
2129 NodeHeights[New] = Height;
2130 Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder));
2131 } else if (Mul1.Value.getNode()) {
2132 // We failed to factorize two MULs, so now the Muls are left outside the
2133 // queue... add them back.
2134 Leaves.push(Mul1);
2135 if (Mul2.Value.getNode())
2136 Leaves.push(Mul2);
2137 CanFactorize = false;
2138 }
2139
2140 // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere
2141 // and the root node itself is not used more than twice. This reduces the
2142 // amount of additional constant extenders introduced by this optimization.
2143 bool CombinedGA = false;
2144 if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() &&
2145 GA.Value.hasOneUse() && N->use_size() < 3) {
2146 GlobalAddressSDNode *GANode =
2147 cast<GlobalAddressSDNode>(GA.Value.getOperand(0));
2148 ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value);
2149
2150 if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() &&
2151 getTargetLowering()->isOffsetFoldingLegal(GANode)) {
2152 LLVM_DEBUG(dbgs() << "--> Combining GA and offset ("
2153 << Offset->getSExtValue() << "): ");
2154 LLVM_DEBUG(GANode->dump(CurDAG));
2155
2156 SDValue NewTGA =
2157 CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
2158 GANode->getValueType(0),
2159 GANode->getOffset() + (uint64_t)Offset->getSExtValue());
2160 GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value),
2161 GA.Value.getValueType(), NewTGA);
2162 GA.Weight += Leaves.top().Weight;
2163
2164 NodeHeights[GA.Value] = getHeight(GA.Value.getNode());
2165 CombinedGA = true;
2166
2167 Leaves.pop(); // Remove the offset constant from the queue
2168 }
2169 }
2170
2171 if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) ||
2172 (RebalanceOnlyImbalancedTrees && !Imbalanced)) {
2173 RootWeights[N] = CurrentWeight;
2174 RootHeights[N] = NodeHeights[SDValue(N, 0)];
2175
2176 return SDValue(N, 0);
2177 }
2178
2179 // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5))
2180 if (NOpcode == ISD::ADD && GA.Value.getNode()) {
2181 WeightedLeaf SHL = Leaves.findSHL(31);
2182 if (SHL.Value.getNode()) {
2183 int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1;
2184 GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value),
2185 GA.Value.getValueType(),
2186 GA.Value, SHL.Value);
2187 GA.Weight = SHL.Weight; // Specifically ignore the GA weight here
2188 NodeHeights[GA.Value] = Height;
2189 }
2190 }
2191
2192 if (GA.Value.getNode())
2193 Leaves.push(GA);
2194
2195 // If this is the top level and we haven't factored out a shift, we should try
2196 // to move a constant to the bottom to match addressing modes like memw(rX+C)
2197 if (TopLevel && !CanFactorize && Leaves.hasConst()) {
2198 LLVM_DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
2199 Leaves.pushToBottom(Leaves.pop());
2200 }
2201
2202 const DataLayout &DL = CurDAG->getDataLayout();
2204
2205 // Rebuild the tree using Huffman's algorithm
2206 while (Leaves.size() > 1) {
2207 WeightedLeaf L0 = Leaves.pop();
2208
2209 // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)),
2210 // otherwise just get the next leaf
2211 WeightedLeaf L1 = Leaves.findMULbyConst();
2212 if (!L1.Value.getNode())
2213 L1 = Leaves.pop();
2214
2215 assert(L0.Weight <= L1.Weight && "Priority queue is broken!");
2216
2217 SDValue V0 = L0.Value;
2218 int V0Weight = L0.Weight;
2219 SDValue V1 = L1.Value;
2220 int V1Weight = L1.Weight;
2221
2222 // Make sure that none of these nodes have been RAUW'd
2223 if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) ||
2224 (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) {
2225 LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
2226 return balanceSubTree(N, TopLevel);
2227 }
2228
2229 ConstantSDNode *V0C = dyn_cast<ConstantSDNode>(V0);
2230 ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(V1);
2231 EVT VT = N->getValueType(0);
2232 SDValue NewNode;
2233
2234 if (V0C && !V1C) {
2235 std::swap(V0, V1);
2236 std::swap(V0C, V1C);
2237 }
2238
2239 // Calculate height of this node
2240 assert(NodeHeights.count(V0) && NodeHeights.count(V1) &&
2241 "Children must have been visited before re-combining them!");
2242 int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1;
2243
2244 // Rebuild this node (and restore SHL from MUL if needed)
2245 if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2())
2246 NewNode = CurDAG->getNode(
2247 ISD::SHL, SDLoc(V0), VT, V0,
2249 V1C->getAPIntValue().logBase2(), SDLoc(N),
2251 else
2252 NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1);
2253
2254 NodeHeights[NewNode] = Height;
2255
2256 int Weight = V0Weight + V1Weight;
2257 Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
2258
2259 LLVM_DEBUG(dbgs() << "--> Built new node (Weight=" << Weight
2260 << ",Height=" << Height << "):\n");
2261 LLVM_DEBUG(NewNode.dump());
2262 }
2263
2264 assert(Leaves.size() == 1);
2265 SDValue NewRoot = Leaves.top().Value;
2266
2267 assert(NodeHeights.count(NewRoot));
2268 int Height = NodeHeights[NewRoot];
2269
2270 // Restore SHL if we earlier converted it to a MUL
2271 if (NewRoot.getOpcode() == ISD::MUL) {
2272 ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1));
2273 if (V1C && V1C->getAPIntValue().isPowerOf2()) {
2274 EVT VT = NewRoot.getValueType();
2275 SDValue V0 = NewRoot.getOperand(0);
2276 NewRoot = CurDAG->getNode(
2277 ISD::SHL, SDLoc(NewRoot), VT, V0,
2279 V1C->getAPIntValue().logBase2(), SDLoc(NewRoot),
2281 }
2282 }
2283
2284 if (N != NewRoot.getNode()) {
2285 LLVM_DEBUG(dbgs() << "--> Root is now: ");
2286 LLVM_DEBUG(NewRoot.dump());
2287
2288 // Replace all uses of old root by new root
2289 CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
2290 // Mark that we have RAUW'd N
2291 RootWeights[N] = -2;
2292 } else {
2293 LLVM_DEBUG(dbgs() << "--> Root unchanged.\n");
2294 }
2295
2296 RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
2297 RootHeights[NewRoot.getNode()] = Height;
2298
2299 return NewRoot;
2300}
2301
2302void HexagonDAGToDAGISel::rebalanceAddressTrees() {
2304 SDNode *N = &Node;
2305 if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
2306 continue;
2307
2308 SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr();
2309 if (BasePtr.getOpcode() != ISD::ADD)
2310 continue;
2311
2312 // We've already processed this node
2313 if (RootWeights.count(BasePtr.getNode()))
2314 continue;
2315
2316 LLVM_DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
2317 LLVM_DEBUG(N->dump(CurDAG));
2318
2319 // FindRoots
2320 SmallVector<SDNode *, 4> Worklist;
2321
2322 Worklist.push_back(BasePtr.getOperand(0).getNode());
2323 Worklist.push_back(BasePtr.getOperand(1).getNode());
2324
2325 while (!Worklist.empty()) {
2326 SDNode *N = Worklist.pop_back_val();
2327 unsigned Opcode = N->getOpcode();
2328
2329 if (!isOpcodeHandled(N))
2330 continue;
2331
2332 Worklist.push_back(N->getOperand(0).getNode());
2333 Worklist.push_back(N->getOperand(1).getNode());
2334
2335 // Not a root if it has only one use and same opcode as its parent
2336 if (N->hasOneUse() && Opcode == N->use_begin()->getOpcode())
2337 continue;
2338
2339 // This root node has already been processed
2340 if (RootWeights.count(N))
2341 continue;
2342
2343 RootWeights[N] = -1;
2344 }
2345
2346 // Balance node itself
2347 RootWeights[BasePtr.getNode()] = -1;
2348 SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true);
2349
2350 if (N->getOpcode() == ISD::LOAD)
2351 N = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
2352 NewBasePtr, N->getOperand(2));
2353 else
2354 N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
2355 NewBasePtr, N->getOperand(3));
2356
2357 LLVM_DEBUG(dbgs() << "--> Final node: ");
2358 LLVM_DEBUG(N->dump(CurDAG));
2359 }
2360
2362 GAUsesInFunction.clear();
2363 RootHeights.clear();
2364 RootWeights.clear();
2365}
unsigned SubReg
aarch64 promote const
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static void push(SmallVectorImpl< uint64_t > &R, StringRef Str)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:478
static bool willShiftRightEliminate(SDValue V, unsigned Amount)
static cl::opt< bool > RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden, cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"))
static unsigned getPowerOf2Factor(SDValue Val)
static cl::opt< bool > CheckSingleUse("hexagon-isel-su", cl::Hidden, cl::init(true), cl::desc("Enable checking of SDNode's single-use status"))
static cl::opt< bool > EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true), cl::desc("Rebalance address calculation trees to improve " "instruction selection"))
static bool isMemOPCandidate(SDNode *I, SDNode *U)
#define PASS_NAME
#define DEBUG_TYPE
static bool isTargetConstant(const SDValue &V)
static bool isOpcodeHandled(const SDNode *N)
static cl::opt< bool > RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false), cl::desc("Rebalance address tree only if this allows optimizations"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define T1
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
#define CH(x, y, z)
Definition: SHA256.cpp:34
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static constexpr uint32_t Opcode
Definition: aarch32.h:200
Class for arbitrary precision integers.
Definition: APInt.h:76
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1433
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1583
unsigned logBase2() const
Definition: APInt.h:1696
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:449
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const GlobalValue * getGlobal() const
This class is used to form a handle around another node that is persistent and is updated across invo...
const SDValue & getValue() const
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool SelectNewCircIntrinsic(SDNode *IntN)
Generate a machine instruction node for the new circular buffer intrinsics.
bool tryLoadOfLoadIntrinsic(LoadSDNode *N)
void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl)
MachineSDNode * LoadInstrForLoadIntrinsic(SDNode *IntN)
bool SelectAnyImm2(SDValue &N, SDValue &R)
bool SelectAnyImm(SDValue &N, SDValue &R)
bool SelectAnyImm0(SDValue &N, SDValue &R)
bool SelectAnyImm1(SDValue &N, SDValue &R)
bool DetectUseSxtw(SDValue &N, SDValue &R)
bool SelectBrevLdIntrinsic(SDNode *IntN)
bool SelectAddrFI(SDValue &N, SDValue &R)
SDNode * StoreInstrForLoadIntrinsic(MachineSDNode *LoadN, SDNode *IntN)
bool SelectAddrGP(SDValue &N, SDValue &R)
bool SelectAnyImmediate(SDValue &N, SDValue &R, Align Alignment)
bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP, Align Alignment)
bool SelectAnyImm3(SDValue &N, SDValue &R)
bool SelectAnyInt(SDValue &N, SDValue &R)
bool SelectAddrGA(SDValue &N, SDValue &R)
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions.
void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl)
bool isValidAutoIncImm(const EVT VT, const int Offset) const
Hexagon target-specific information for each MachineFunction.
BitVector getReservedRegs(const MachineFunction &MF) const override
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
const HexagonFrameLowering * getFrameLowering() const override
const HexagonRegisterInfo * getRegisterInfo() const override
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
unsigned getVectorLength() const
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
uint64_t estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
const TargetLowering * TLI
MachineFunction * MF
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:720
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:725
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:543
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
ilist< SDNode >::size_type allnodes_size() const
Definition: SelectionDAG.h:539
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:674
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:797
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
This class is used to represent ISD::STORE nodes.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const
Return the type to use for a scalar shift opcode, given the shifted amount type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1029
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ ExternalSymbol
Definition: ISDOpcodes.h:83
@ BlockAddress
Definition: ISDOpcodes.h:84
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1455
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1486
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1684
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
FunctionPass * createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOptLevel OptLevel)
createHexagonISelDag - This pass converts a legalized DAG into a Hexagon-specific DAG,...
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
@ Default
The result values are uniform if and only if all operands are uniform.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:351
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:299
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:160
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:311
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:144
This class contains a discriminated union of information about pointers in memory operands,...