LLVM 23.0.0git
AMDGPURegBankLegalizeRules.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
39
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
44
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(0, 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(1, 64);
63 case P2:
64 return MRI.getType(Reg) == LLT::pointer(2, 32);
65 case P3:
66 return MRI.getType(Reg) == LLT::pointer(3, 32);
67 case P4:
68 return MRI.getType(Reg) == LLT::pointer(4, 64);
69 case P5:
70 return MRI.getType(Reg) == LLT::pointer(5, 32);
71 case P8:
72 return MRI.getType(Reg) == LLT::pointer(8, 128);
73 case Ptr32:
74 return isAnyPtr(MRI.getType(Reg), 32);
75 case Ptr64:
76 return isAnyPtr(MRI.getType(Reg), 64);
77 case Ptr128:
78 return isAnyPtr(MRI.getType(Reg), 128);
79 case V2S16:
80 return MRI.getType(Reg) == LLT::fixed_vector(2, 16);
81 case V2S32:
82 return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
83 case V3S32:
84 return MRI.getType(Reg) == LLT::fixed_vector(3, 32);
85 case V4S32:
86 return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
87 case B32:
88 return MRI.getType(Reg).getSizeInBits() == 32;
89 case B64:
90 return MRI.getType(Reg).getSizeInBits() == 64;
91 case B96:
92 return MRI.getType(Reg).getSizeInBits() == 96;
93 case B128:
94 return MRI.getType(Reg).getSizeInBits() == 128;
95 case B160:
96 return MRI.getType(Reg).getSizeInBits() == 160;
97 case B256:
98 return MRI.getType(Reg).getSizeInBits() == 256;
99 case B512:
100 return MRI.getType(Reg).getSizeInBits() == 512;
101 case UniS1:
102 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniform(Reg);
103 case UniS16:
104 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniform(Reg);
105 case UniS32:
106 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg);
107 case UniS64:
108 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg);
109 case UniS128:
110 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isUniform(Reg);
111 case UniP0:
112 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniform(Reg);
113 case UniP1:
114 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg);
115 case UniP2:
116 return MRI.getType(Reg) == LLT::pointer(2, 32) && MUI.isUniform(Reg);
117 case UniP3:
118 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniform(Reg);
119 case UniP4:
120 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
121 case UniP5:
122 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
123 case UniP8:
124 return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniform(Reg);
125 case UniPtr32:
126 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
127 case UniPtr64:
128 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isUniform(Reg);
129 case UniPtr128:
130 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
131 case UniV2S16:
132 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
133 case UniV2S32:
134 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isUniform(Reg);
135 case UniB32:
136 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
137 case UniB64:
138 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(Reg);
139 case UniB96:
140 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(Reg);
141 case UniB128:
142 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(Reg);
143 case UniB160:
144 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isUniform(Reg);
145 case UniB256:
146 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg);
147 case UniB512:
148 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
149 case UniBRC: {
150 if (!MUI.isUniform(Reg))
151 return false;
152 // Check if there is SGPR register class of same size as the LLT.
153 const SIRegisterInfo *TRI =
154 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
155 // There is no 16 bit SGPR register class. Extra size check is required
156 // since getSGPRClassForBitWidth returns SReg_32RegClass for Size 16.
157 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
158 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(LLTSize);
159 }
160 case DivS1:
161 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg);
162 case DivS16:
163 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg);
164 case DivS32:
165 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
166 case DivS64:
167 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg);
168 case DivS128:
169 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isDivergent(Reg);
170 case DivP0:
171 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergent(Reg);
172 case DivP1:
173 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg);
174 case DivP2:
175 return MRI.getType(Reg) == LLT::pointer(2, 32) && MUI.isDivergent(Reg);
176 case DivP3:
177 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergent(Reg);
178 case DivP4:
179 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg);
180 case DivP5:
181 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg);
182 case DivPtr32:
183 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isDivergent(Reg);
184 case DivPtr64:
185 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isDivergent(Reg);
186 case DivPtr128:
187 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
188 case DivV2S16:
189 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
190 case DivV2S32:
191 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isDivergent(Reg);
192 case DivB32:
193 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
194 case DivB64:
195 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(Reg);
196 case DivB96:
197 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(Reg);
198 case DivB128:
199 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(Reg);
200 case DivB160:
201 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isDivergent(Reg);
202 case DivB256:
203 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(Reg);
204 case DivB512:
205 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg);
206 case DivBRC: {
207 if (!MUI.isDivergent(Reg))
208 return false;
209 // Check if there is VGPR register class of same size as the LLT.
210 const SIRegisterInfo *TRI =
211 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
212 return TRI->getSGPRClassForBitWidth(MRI.getType(Reg).getSizeInBits());
213 }
214 case _:
215 return true;
216 default:
217 llvm_unreachable("missing matchUniformityAndLLT");
218 }
219}
220
222 const MachineUniformityInfo &MUI,
223 const MachineRegisterInfo &MRI) const {
224 // Check LLT signature.
225 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
226 const MachineOperand &MO = MI.getOperand(i);
227 if (OpUniformityAndTypes[i] == _) {
228 assert((!MI.getOperand(i).isReg() ||
229 !MI.getOperand(i).getReg().isVirtual()) &&
230 "_ is for non-register and physical register operands only");
231 continue;
232 }
233
234 // Remaining IDs check registers.
235 if (!MO.isReg())
236 return false;
237
239 return false;
240 }
241
242 // More complex check.
243 if (TestFunc)
244 return TestFunc(MI);
245
246 return true;
247}
248
250
252 : FastTypes(FastTypes) {}
253
255 if (Ty == LLT::scalar(16))
256 return S16;
257 if (Ty == LLT::scalar(32))
258 return S32;
259 if (Ty == LLT::scalar(64))
260 return S64;
261 if (Ty == LLT::fixed_vector(2, 16))
262 return V2S16;
263 if (Ty == LLT::fixed_vector(2, 32))
264 return V2S32;
265 if (Ty == LLT::fixed_vector(3, 32))
266 return V3S32;
267 if (Ty == LLT::fixed_vector(4, 32))
268 return V4S32;
269 return _;
270}
271
273 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
274 isAnyPtr(Ty, 32))
275 return B32;
276 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
277 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
278 return B64;
279 if (Ty == LLT::fixed_vector(3, 32))
280 return B96;
281 if (Ty == LLT::fixed_vector(4, 32) || Ty == LLT::fixed_vector(2, 64) ||
282 Ty == LLT::fixed_vector(8, 16) || isAnyPtr(Ty, 128))
283 return B128;
284 return _;
285}
286
287const RegBankLLTMapping *
290 const MachineUniformityInfo &MUI) const {
291 // Search in "Fast Rules".
292 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
293 // slot that could "match fast Predicate". If not, InvalidMapping is
294 // returned which results in failure, does not search "Slow Rules".
295 if (FastTypes != NoFastRules) {
296 Register Reg = MI.getOperand(0).getReg();
297 int Slot;
298 if (FastTypes == StandardB)
299 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));
300 else
301 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));
302
303 if (Slot != -1)
304 return MUI.isUniform(Reg) ? &Uni[Slot] : &Div[Slot];
305 }
306
307 // Slow search for more complex rules.
308 for (const RegBankLegalizeRule &Rule : Rules) {
309 if (Rule.Predicate.match(MI, MUI, MRI))
310 return &Rule.OperandMapping;
311 }
312
313 return nullptr;
314}
315
317 Rules.push_back(Rule);
318}
319
321 RegBankLLTMapping RuleApplyIDs) {
322 int Slot = getFastPredicateSlot(Ty);
323 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
324 Div[Slot] = std::move(RuleApplyIDs);
325}
326
328 RegBankLLTMapping RuleApplyIDs) {
329 int Slot = getFastPredicateSlot(Ty);
330 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
331 Uni[Slot] = std::move(RuleApplyIDs);
332}
333
334int SetOfRulesForOpcode::getFastPredicateSlot(
336 switch (FastTypes) {
337 case Standard: {
338 switch (Ty) {
339 case S32:
340 return 0;
341 case S16:
342 return 1;
343 case S64:
344 return 2;
345 case V2S16:
346 return 3;
347 default:
348 return -1;
349 }
350 }
351 case StandardB: {
352 switch (Ty) {
353 case B32:
354 return 0;
355 case B64:
356 return 1;
357 case B96:
358 return 2;
359 case B128:
360 return 3;
361 default:
362 return -1;
363 }
364 }
365 case Vector: {
366 switch (Ty) {
367 case S32:
368 return 0;
369 case V2S32:
370 return 1;
371 case V3S32:
372 return 2;
373 case V4S32:
374 return 3;
375 default:
376 return -1;
377 }
378 }
379 default:
380 return -1;
381 }
382}
383
384RegBankLegalizeRules::RuleSetInitializer
385RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
386 FastRulesTypes FastTypes) {
387 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
388}
389
390RegBankLegalizeRules::RuleSetInitializer
391RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
392 FastRulesTypes FastTypes) {
393 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
394}
395
398 unsigned Opc = MI.getOpcode();
399 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
400 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
401 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
402 unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
403 auto IRAIt = IRulesAlias.find(IntrID);
404 if (IRAIt == IRulesAlias.end())
405 return nullptr;
406 return &IRules.at(IRAIt->second);
407 }
408
409 auto GRAIt = GRulesAlias.find(Opc);
410 if (GRAIt == GRulesAlias.end())
411 return nullptr;
412 return &GRules.at(GRAIt->second);
413}
414
415// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
416class Predicate {
417private:
418 struct Elt {
419 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
420 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
421 // Sequences of && and || will be represented by jumps, for example:
422 // (A && B && ... X) or (A && B && ... X) || Y
423 // A == true jump to B
424 // A == false jump to end or Y, result is A(false) or Y
425 // (A || B || ... X) or (A || B || ... X) && Y
426 // A == true jump to end or Y, result is A(true) or Y
427 // A == false jump to B
428 // Notice that when negating expression, we simply flip Neg on each Pred
429 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
430 std::function<bool(const MachineInstr &)> Pred;
431 bool Neg; // Neg of Pred is calculated before jump
432 unsigned TJumpOffset;
433 unsigned FJumpOffset;
434 };
435
436 SmallVector<Elt, 8> Expression;
437
438 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(Expr); };
439
440public:
441 Predicate(std::function<bool(const MachineInstr &)> Pred) {
442 Expression.push_back({Pred, false, 1, 1});
443 };
444
445 bool operator()(const MachineInstr &MI) const {
446 unsigned Idx = 0;
447 unsigned ResultIdx = Expression.size();
448 bool Result;
449 do {
450 Result = Expression[Idx].Pred(MI);
451 Result = Expression[Idx].Neg ? !Result : Result;
452 if (Result) {
453 Idx += Expression[Idx].TJumpOffset;
454 } else {
455 Idx += Expression[Idx].FJumpOffset;
456 }
457 } while ((Idx != ResultIdx));
458
459 return Result;
460 };
461
462 Predicate operator!() const {
463 SmallVector<Elt, 8> NegExpression;
464 for (const Elt &ExprElt : Expression) {
465 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
466 ExprElt.TJumpOffset});
467 }
468 return Predicate(std::move(NegExpression));
469 };
470
471 Predicate operator&&(const Predicate &RHS) const {
472 SmallVector<Elt, 8> AndExpression = Expression;
473
474 unsigned RHSSize = RHS.Expression.size();
475 unsigned ResultIdx = Expression.size();
476 for (unsigned i = 0; i < ResultIdx; ++i) {
477 // LHS results in false, whole expression results in false.
478 if (i + AndExpression[i].FJumpOffset == ResultIdx)
479 AndExpression[i].FJumpOffset += RHSSize;
480 }
481
482 AndExpression.append(RHS.Expression);
483
484 return Predicate(std::move(AndExpression));
485 }
486
487 Predicate operator||(const Predicate &RHS) const {
488 SmallVector<Elt, 8> OrExpression = Expression;
489
490 unsigned RHSSize = RHS.Expression.size();
491 unsigned ResultIdx = Expression.size();
492 for (unsigned i = 0; i < ResultIdx; ++i) {
493 // LHS results in true, whole expression results in true.
494 if (i + OrExpression[i].TJumpOffset == ResultIdx)
495 OrExpression[i].TJumpOffset += RHSSize;
496 }
497
498 OrExpression.append(RHS.Expression);
499
500 return Predicate(std::move(OrExpression));
501 }
502};
503
504// Initialize rules
507 : ST(&_ST), MRI(&_MRI) {
508
509 addRulesForGOpcs({G_ADD, G_SUB}, Standard)
510 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
511 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
512 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
513 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
515 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
516 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}})
517 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}});
518
519 addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
520 .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
521 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
522
523 addRulesForGOpcs({G_UADDE, G_USUBE, G_SADDE, G_SSUBE}, Standard)
525 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
526
527 addRulesForGOpcs({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}, Standard)
528 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
529 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
530 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
531 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
533 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
534
535 bool HasVecMulU64 = ST->hasVectorMulU64();
536 addRulesForGOpcs({G_MUL}, Standard)
537 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
538 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
539 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
540 .Uni(S64, {{SgprB64}, {SgprB64, SgprB64}})
542 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
543 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
544 .Div(S64, {{VgprB64}, {VgprB64, VgprB64}}, HasVecMulU64)
545 .Div(S64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32Mul}, !HasVecMulU64);
546
547 bool hasMulHi = ST->hasScalarMulHiInsts();
548 addRulesForGOpcs({G_UMULH, G_SMULH}, Standard)
549 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
550 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasMulHi)
551 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasMulHi);
552
553 addRulesForGOpcs({G_AMDGPU_MAD_U64_U32}, Standard)
554 .Div(S64, {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}})
556
557 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
558 addRulesForGOpcs({G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32}, Standard)
559 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}, UniMul64}, HasScalarSMulU64)
560 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}, DivSMulToMAD});
561
562 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
564 .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
565 .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
566 .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
567 .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
568 .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
569 .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
570 .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
571
572 addRulesForGOpcs({G_SHL}, Standard)
573 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
574 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
576 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
577 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
578 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
579 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
580 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
581
582 addRulesForGOpcs({G_LSHR}, Standard)
583 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
584 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
586 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
587 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
588 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
589 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
590 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
591
592 addRulesForGOpcs({G_ASHR}, Standard)
593 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
594 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
596 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
597 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
598 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
599 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
600 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
601
602 addRulesForGOpcs({G_FSHR}, Standard)
603 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
604 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
605
606 addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
607
608 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
609 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
610 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
611 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
612 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
613
614 addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
615 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
616 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
617 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
618 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
620 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
621
622 addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
623 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
624 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
625 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
626 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
628 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
629
630 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT and G_FCONSTANT
631 // here, rest is trivially regbankselected earlier
632 addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
633 addRulesForGOpcs({G_CONSTANT})
634 .Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
635
636 addRulesForGOpcs({G_FREEZE})
637 .Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt}}})
638 .Any({{DivS1}, {{Vcc}, {Vcc}}})
639 .Any({{UniS16}, {{Sgpr16}, {Sgpr16}}})
640 .Any({{UniBRC}, {{SgprBRC}, {SgprBRC}}})
641 .Any({{DivBRC}, {{VgprBRC}, {VgprBRC}}});
642
643 addRulesForGOpcs({G_UNMERGE_VALUES})
644 .Any({{UniS16}, {{}, {}, UnmergeToShiftTrunc}})
645 .Any({{UniBRC}, {{}, {}, VerifyAllSgpr}})
646 .Any({{DivBRC}, {{}, {}, ApplyAllVgpr}});
647
648 // LOAD {Div}, {{VgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
649 // LOAD {Uni}, {{UniInVgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
650 // LOAD_NORET {}, {{}, {Imm, VgprSrc, ..., Sgpr_WF_RsrcIdx}}
651 // STORE {}, {{}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
652 addRulesForGOpcs({G_AMDGPU_INTRIN_IMAGE_LOAD, G_AMDGPU_INTRIN_IMAGE_LOAD_D16,
653 G_AMDGPU_INTRIN_IMAGE_LOAD_NORET,
654 G_AMDGPU_INTRIN_IMAGE_STORE,
655 G_AMDGPU_INTRIN_IMAGE_STORE_D16})
656 .Any({{}, {{}, {}, ApplyINTRIN_IMAGE}});
657
658 Predicate isSignedICmp([](const MachineInstr &MI) -> bool {
659 auto Pred =
660 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
661 return CmpInst::isSigned(Pred);
662 });
663
664 Predicate isEqualityICmp([](const MachineInstr &MI) -> bool {
665 auto Pred =
666 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
667 return ICmpInst::isEquality(Pred);
668 });
669
670 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
671 // clang-format off
672 addRulesForGOpcs({G_ICMP})
673 .Any({{{UniS1, _, S16}, isEqualityICmp}, {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
674 .Any({{{UniS1, _, S16}, !isEqualityICmp && isSignedICmp}, {{Sgpr32Trunc}, {None, Sgpr32SExt, Sgpr32SExt}}})
675 .Any({{{UniS1, _, S16}, !isEqualityICmp && !isSignedICmp}, {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
676 .Any({{{DivS1, _, S16}}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
677 .Any({{{UniS1, _, S32}}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
678 .Any({{{DivS1, _, S32}}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
679 .Any({{{UniS1, _, S64}, isEqualityICmp}, {{Sgpr32Trunc}, {None, Sgpr64, Sgpr64}}}, HasScalarCompareEq64)
680 .Any({{{UniS1, _, S64}, isEqualityICmp}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}}, !HasScalarCompareEq64)
681 .Any({{{UniS1, _, S64}, !isEqualityICmp}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
682 .Any({{{DivS1, _, S64}}, {{Vcc}, {None, Vgpr64, Vgpr64}}})
683 .Any({{{UniS1, _, Ptr32}}, {{Sgpr32Trunc}, {None, SgprPtr32, SgprPtr32}}})
684 .Any({{{DivS1, _, Ptr32}}, {{Vcc}, {None, VgprPtr32, VgprPtr32}}})
685 .Any({{{UniS1, _, Ptr64}, isEqualityICmp}, {{Sgpr32Trunc}, {None, SgprPtr64, SgprPtr64}}}, HasScalarCompareEq64)
686 .Any({{{UniS1, _, Ptr64}, isEqualityICmp}, {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}}, !HasScalarCompareEq64)
687 .Any({{{UniS1, _, Ptr64}, !isEqualityICmp}, {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}})
688 .Any({{{DivS1, _, Ptr64}}, {{Vcc}, {None, VgprPtr64, VgprPtr64}}});
689 // clang-format on
690
691 addRulesForGOpcs({G_BRCOND})
692 .Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}})
693 .Any({{DivS1}, {{}, {Vcc}}});
694
695 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
696
697 addRulesForGOpcs({G_SELECT}, StandardB)
698 .Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
700 .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
704
705 addRulesForGOpcs({G_ANYEXT})
706 .Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away
707 .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
708 .Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away
709 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
710 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
711 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
712 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
713 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
714 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
715 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
716
717 bool Has16bitCmp = ST->has16BitInsts();
718
719 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
720 // It is up to user to deal with truncated bits.
721 addRulesForGOpcs({G_TRUNC})
722 .Any({{UniS1, UniS16}, {{None}, {None}}}) // should be combined away
723 .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
724 .Any({{UniS1, UniS64}, {{None}, {None}}}) // should be combined away
725 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
726 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
727 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
728 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
729 .Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
730 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
731 // This is non-trivial. VgprToVccCopy is done using compare instruction.
732 .Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}}, Has16bitCmp)
734 !Has16bitCmp)
735 .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
736 .Any({{DivS1, DivS64}, {{Vcc}, {Vgpr64}, VgprToVccCopy}});
737
738 addRulesForGOpcs({G_ZEXT})
742 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
743 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
744 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
745 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
746 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
747 // not extending S16 to S32 is questionable.
748 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
749 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
750 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
751 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
752
753 addRulesForGOpcs({G_SEXT})
757 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
758 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
759 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
760 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
761 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
762 // not extending S16 to S32 is questionable.
763 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
764 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
765 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
766 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
767
768 addRulesForGOpcs({G_SEXT_INREG})
769 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
770 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
771 .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
773
774 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)
775 .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}})
776 .Div(S32, {{Vgpr32}, {Vgpr32, Imm}})
777 .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
778 .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
779
780 addRulesForGOpcs({G_ASSERT_ALIGN}, Standard)
781 .Uni(S32, {{Sgpr32}, {Sgpr32}})
782 .Div(S32, {{Vgpr32}, {Vgpr32}})
783 .Uni(S64, {{Sgpr64}, {Sgpr64}})
784 .Div(S64, {{Vgpr64}, {Vgpr64}})
785 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32}}})
786 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32}}})
787 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64}}})
788 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64}}});
789
790 // Atomic read-modify-write operations: result and value are always VGPR,
791 // pointer varies by address space.
792 addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
793 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
794 G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN,
795 G_ATOMICRMW_UMAX, G_ATOMICRMW_UINC_WRAP,
796 G_ATOMICRMW_UDEC_WRAP})
797 .Any({{DivS32, P0, S32}, {{Vgpr32}, {VgprP0, Vgpr32}}})
798 .Any({{DivS64, P0, S64}, {{Vgpr64}, {VgprP0, Vgpr64}}})
799 .Any({{DivS32, P1, S32}, {{Vgpr32}, {VgprP1, Vgpr32}}})
800 .Any({{DivS64, P1, S64}, {{Vgpr64}, {VgprP1, Vgpr64}}})
801 .Any({{DivS32, P3, S32}, {{Vgpr32}, {VgprP3, Vgpr32}}})
802 .Any({{DivS64, P3, S64}, {{Vgpr64}, {VgprP3, Vgpr64}}});
803
804 bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
805 bool HasAtomicBufferGlobalPkAddF16Insts =
806 ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
807 ST->hasAtomicBufferGlobalPkAddF16Insts();
808 bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
809 addRulesForGOpcs({G_ATOMICRMW_FADD})
810 .Any({{DivS32, P0, S32}, {{Vgpr32}, {VgprP0, Vgpr32}}})
811 .Any({{DivS64, P0, S64}, {{Vgpr64}, {VgprP0, Vgpr64}}})
812 .Any({{DivS32, P1, S32}, {{Vgpr32}, {VgprP1, Vgpr32}}})
813 .Any({{DivS64, P1, S64}, {{Vgpr64}, {VgprP1, Vgpr64}}})
814 .Any({{DivS32, P3, S32}, {{Vgpr32}, {VgprP3, Vgpr32}}})
815 .Any({{DivS64, P3, S64}, {{Vgpr64}, {VgprP3, Vgpr64}}})
816 .Any({{DivV2S16, P0, V2S16}, {{VgprV2S16}, {VgprP0, VgprV2S16}}},
817 HasAtomicFlatPkAdd16Insts)
818 .Any({{DivV2S16, P1, V2S16}, {{VgprV2S16}, {VgprP1, VgprV2S16}}},
819 HasAtomicBufferGlobalPkAddF16Insts)
820 .Any({{DivV2S16, P3, V2S16}, {{VgprV2S16}, {VgprP3, VgprV2S16}}},
821 HasAtomicDsPkAdd16Insts);
822
823 addRulesForGOpcs({G_ATOMIC_CMPXCHG})
824 .Any({{DivS32, P2}, {{Vgpr32}, {VgprP2, Vgpr32, Vgpr32}}})
825 .Any({{DivS64, P2}, {{Vgpr64}, {VgprP2, Vgpr64, Vgpr64}}})
826 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3, Vgpr32, Vgpr32}}})
827 .Any({{DivS64, P3}, {{Vgpr64}, {VgprP3, Vgpr64, Vgpr64}}});
828
829 addRulesForGOpcs({G_AMDGPU_ATOMIC_CMPXCHG})
830 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0, VgprV2S32}}})
831 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1, VgprV2S32}}})
832 .Any({{DivS64, P0}, {{Vgpr64}, {VgprP0, VgprV2S64}}})
833 .Any({{DivS64, P1}, {{Vgpr64}, {VgprP1, VgprV2S64}}});
834
835 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_CMPSWAP}, Standard)
836 .Div(S32, {{Vgpr32},
838 .Div(S64, {{Vgpr64},
840
841 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_SWAP, G_AMDGPU_BUFFER_ATOMIC_UMAX,
842 G_AMDGPU_BUFFER_ATOMIC_UMIN, G_AMDGPU_BUFFER_ATOMIC_SMAX,
843 G_AMDGPU_BUFFER_ATOMIC_SMIN},
844 Standard)
847
848 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
849 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
850 bool usesTrue16 = ST->useRealTrue16Insts();
851
852 Predicate isAlign16([](const MachineInstr &MI) -> bool {
853 return (*MI.memoperands_begin())->getAlign() >= Align(16);
854 });
855
856 Predicate isAlign4([](const MachineInstr &MI) -> bool {
857 return (*MI.memoperands_begin())->getAlign() >= Align(4);
858 });
859
860 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
861 return (*MI.memoperands_begin())->isAtomic();
862 });
863
864 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
865 return AMDGPU::isUniformMMO(*MI.memoperands_begin());
866 });
867
868 Predicate isConst([](const MachineInstr &MI) -> bool {
869 // Address space in MMO be different then address space on pointer.
870 const MachineMemOperand *MMO = *MI.memoperands_begin();
871 const unsigned AS = MMO->getAddrSpace();
872 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
874 });
875
876 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
877 return (*MI.memoperands_begin())->isVolatile();
878 });
879
880 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
881 return (*MI.memoperands_begin())->isInvariant();
882 });
883
884 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
885 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
886 });
887
888 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
889 const MachineMemOperand *MMO = *MI.memoperands_begin();
890 return MMO->getAlign() >= Align(MMO->getSize().getValue());
891 });
892
893 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
894 const MachineMemOperand *MMO = *MI.memoperands_begin();
895 const unsigned MemSize = 8 * MMO->getSize().getValue();
896 return MemSize == 16 || MemSize == 8;
897 });
898
899 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
900 const MachineMemOperand *MMO = *MI.memoperands_begin();
901 return 8 * MMO->getSize().getValue() == 32;
902 });
903
904 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
905 (isConst || isInvMMO || isNoClobberMMO);
906
907 // clang-format off
908 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
909 addRulesForGOpcs({G_LOAD})
910 // flat, addrspace(0), never uniform - flat_load
911 .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
912 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
913 .Any({{DivB64, P0}, {{VgprB64}, {VgprP0}}})
914 .Any({{DivB96, P0}, {{VgprB96}, {VgprP0}}})
915 .Any({{DivB128, P0}, {{VgprB128}, {VgprP0}}})
916
917 // global, addrspace(1)
918 // divergent - global_load
919 .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
920 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
921 .Any({{DivB64, P1}, {{VgprB64}, {VgprP1}}})
922 .Any({{DivB96, P1}, {{VgprB96}, {VgprP1}}})
923 .Any({{DivB128, P1}, {{VgprB128}, {VgprP1}}})
924 .Any({{DivB256, P1}, {{VgprB256}, {VgprP1}, SplitLoad}})
925 .Any({{DivB512, P1}, {{VgprB512}, {VgprP1}, SplitLoad}})
926
927 // uniform - s_load
928 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
929 .Any({{{UniS16, P1}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
930 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
931 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
932 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
933 .Any({{{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}}}) //32-bit load
934 .Any({{{UniB64, P1}, isAlign4 && isUL}, {{SgprB64}, {SgprP1}}})
935 .Any({{{UniB96, P1}, isAlign16 && isUL}, {{SgprB96}, {SgprP1}, WidenLoad}}, !hasSMRDx3)
936 .Any({{{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP1}, SplitLoad}}, !hasSMRDx3)
937 .Any({{{UniB96, P1}, isAlign4 && isUL}, {{SgprB96}, {SgprP1}}}, hasSMRDx3)
938 .Any({{{UniB128, P1}, isAlign4 && isUL}, {{SgprB128}, {SgprP1}}})
939 .Any({{{UniB256, P1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
940 .Any({{{UniB512, P1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
941
942 // Uniform via global or buffer load, for example volatile or non-aligned
943 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
944 // selected as global_load, use SgprP1 for pointer instead to match
945 // patterns without flat-for-global, default for GFX7 and older.
946 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
947 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
948 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
949 .Any({{{UniS16, P1}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && !hasSMRDSmall) // s16 load
950 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
951 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
952 .Any({{{UniB64, P1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
953 .Any({{{UniB96, P1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
954 .Any({{{UniB128, P1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
955 .Any({{{UniB256, P1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP1}, SplitLoad}})
956 .Any({{{UniB512, P1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP1}, SplitLoad}})
957
958 // local, addrspace(3) - ds_load
959 .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
960 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
961 .Any({{DivB64, P3}, {{VgprB64}, {VgprP3}}})
962 .Any({{DivB96, P3}, {{VgprB96}, {VgprP3}}})
963 .Any({{DivB128, P3}, {{VgprB128}, {VgprP3}}})
964
965 .Any({{UniS16, P3}, {{UniInVgprS16}, {SgprP3}}}, usesTrue16) // 16-bit load
966 .Any({{UniB32, P3}, {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
967 .Any({{UniB64, P3}, {{UniInVgprB64}, {VgprP3}}})
968 .Any({{UniB96, P3}, {{UniInVgprB96}, {VgprP3}}})
969 .Any({{UniB128, P3}, {{UniInVgprB128}, {VgprP3}}})
970
971 // constant, addrspace(4)
972 // divergent - global_load
973 .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
974 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
975 .Any({{DivB64, P4}, {{VgprB64}, {VgprP4}}})
976 .Any({{DivB96, P4}, {{VgprB96}, {VgprP4}}})
977 .Any({{DivB128, P4}, {{VgprB128}, {VgprP4}}})
978 .Any({{DivB256, P4}, {{VgprB256}, {VgprP4}, SplitLoad}})
979 .Any({{DivB512, P4}, {{VgprB512}, {VgprP4}, SplitLoad}})
980
981 // uniform - s_load
982 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
983 .Any({{{UniS16, P4}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
984 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
985 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
986 .Any({{{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) //32-bit load
987 .Any({{{UniB64, P4}, isAlign4 && isUL}, {{SgprB64}, {SgprP4}}})
988 .Any({{{UniB96, P4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasSMRDx3)
989 .Any({{{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasSMRDx3)
990 .Any({{{UniB96, P4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasSMRDx3)
991 .Any({{{UniB128, P4}, isAlign4 && isUL}, {{SgprB128}, {SgprP4}}})
992 .Any({{{UniB256, P4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}})
993 .Any({{{UniB512, P4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}})
994
995 // uniform in vgpr - global_load or buffer_load
996 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
997 .Any({{{UniS16, P4}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && !hasSMRDSmall) // s16 load
998 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
999 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1000 .Any({{{UniB64, P4}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP4}}})
1001 .Any({{{UniB96, P4}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP4}}})
1002 .Any({{{UniB128, P4}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP4}}})
1003 .Any({{{UniB256, P4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP4}, SplitLoad}})
1004 .Any({{{UniB512, P4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP4}, SplitLoad}})
1005
1006 // private, addrspace(5), never uniform - scratch_load
1007 .Any({{DivS16, P5}, {{Vgpr16}, {VgprP5}}}, usesTrue16)
1008 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1009 .Any({{DivB64, P5}, {{VgprB64}, {VgprP5}}})
1010 .Any({{DivB96, P5}, {{VgprB96}, {VgprP5}}})
1011 .Any({{DivB128, P5}, {{VgprB128}, {VgprP5}}})
1012
1013 .Any({{DivS32, Ptr128}, {{Vgpr32}, {VgprPtr128}}});
1014
1015
1016 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
1017 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0}}})
1018
1019 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1}}})
1020 .Any({{{UniS32, P1}, isAlign4 && isUL}, {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)
1021 .Any({{{UniS32, P1}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP1}}}, hasSMRDSmall)
1022 .Any({{{UniS32, P1}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP1}}}, !hasSMRDSmall)
1023 .Any({{{UniS32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP1}}}, hasSMRDSmall)
1024
1025 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3}}})
1026 .Any({{UniS32, P3}, {{UniInVgprS32}, {VgprP3}}})
1027
1028 .Any({{DivS32, P4}, {{Vgpr32}, {VgprP4}}})
1029 .Any({{{UniS32, P4}, isAlign4 && isUL}, {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)
1030 .Any({{{UniS32, P4}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP4}}}, hasSMRDSmall)
1031 .Any({{{UniS32, P4}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP4}}}, !hasSMRDSmall)
1032 .Any({{{UniS32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP4}}}, hasSMRDSmall)
1033
1034 .Any({{DivS32, P5}, {{Vgpr32}, {VgprP5}}});
1035
1036 addRulesForGOpcs({G_STORE})
1037 // addrspace(0)
1038 .Any({{S16, P0}, {{}, {Vgpr16, VgprP0}}}, usesTrue16) // 16-bit store
1039 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
1040 .Any({{B64, P0}, {{}, {VgprB64, VgprP0}}})
1041 .Any({{B96, P0}, {{}, {VgprB96, VgprP0}}})
1042 .Any({{B128, P0}, {{}, {VgprB128, VgprP0}}})
1043
1044 // addrspace(1), there are no stores to addrspace(4)
1045 // For targets:
1046 // - with "+flat-for-global" - global_store
1047 // - without(-flat-for-global) - buffer_store addr64
1048 .Any({{S16, DivP1}, {{}, {Vgpr16, VgprP1}}}, usesTrue16) // 16-bit store
1049 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1050 .Any({{B64, DivP1}, {{}, {VgprB64, VgprP1}}})
1051 .Any({{B96, DivP1}, {{}, {VgprB96, VgprP1}}})
1052 .Any({{B128, DivP1}, {{}, {VgprB128, VgprP1}}})
1053
1054 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
1055 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
1056 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
1057 .Any({{S16, UniP1}, {{}, {Vgpr16, SgprP1}}}, usesTrue16) // 16-bit store
1058 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1059 .Any({{B64, UniP1}, {{}, {VgprB64, SgprP1}}})
1060 .Any({{B96, UniP1}, {{}, {VgprB96, SgprP1}}})
1061 .Any({{B128, UniP1}, {{}, {VgprB128, SgprP1}}})
1062
1063 // addrspace(3) and addrspace(5)
1064 .Any({{S16, Ptr32}, {{}, {Vgpr16, VgprPtr32}}}, usesTrue16) // 16-bit store
1065 .Any({{B32, Ptr32}, {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
1066 .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
1067 .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
1068 .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
1069
1070 // clang-format on
1071
1072 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1073 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1074 StandardB)
1083
1084 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1085 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1086 StandardB)
1089
1090 addRulesForGOpcs(
1091 {G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, G_AMDGPU_BUFFER_LOAD_USHORT_TFE},
1092 StandardB)
1095
1096 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_TFE, G_AMDGPU_BUFFER_LOAD_FORMAT_TFE},
1097 StandardB)
1105 .Any({{UniB160},
1107
1108 addRulesForGOpcs(
1109 {G_AMDGPU_BUFFER_LOAD_FORMAT_D16, G_AMDGPU_TBUFFER_LOAD_FORMAT_D16},
1110 StandardB)
1117
1118 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
1119 G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
1120 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1121 G_AMDGPU_TBUFFER_STORE_FORMAT,
1122 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1123 .Any({{B32}, {{}, {VgprB32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1124 .Any({{B64}, {{}, {VgprB64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1125 .Any({{B96}, {{}, {VgprB96, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1126 .Any({{B128}, {{}, {VgprB128, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1127
1128 // Buffer atomics: resource descriptor + scalar offset are SGPR, data and
1129 // address components are VGPR.
1130 //
1131 // Operand order (SIInstructions.td BufferAtomicGenericInstruction):
1132 // dst = op vdata, rsrc, vindex, voffset, soffset, offset_imm, cachepolicy,
1133 // idxen_imm
1134 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_FADD})
1135 .Any({{S32, S32, V4S32, S32, S32, S32},
1137 .Any({{S64, S64, V4S32, S32, S32, S32},
1139 .Any({{V2S16, V2S16, V4S32, S32, S32, S32},
1140 {{VgprV2S16},
1142
1143 addRulesForGOpcs({G_PTR_ADD})
1144 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
1145 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
1146 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
1147 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
1148
1149 addRulesForGOpcs({G_INTTOPTR})
1150 .Any({{UniPtr32}, {{SgprPtr32}, {Sgpr32}}})
1151 .Any({{DivPtr32}, {{VgprPtr32}, {Vgpr32}}})
1152 .Any({{UniPtr64}, {{SgprPtr64}, {Sgpr64}}})
1153 .Any({{DivPtr64}, {{VgprPtr64}, {Vgpr64}}})
1154 .Any({{UniPtr128}, {{SgprPtr128}, {Sgpr128}}})
1155 .Any({{DivPtr128}, {{VgprPtr128}, {Vgpr128}}});
1156
1157 addRulesForGOpcs({G_PTRTOINT})
1158 .Any({{UniS32}, {{Sgpr32}, {SgprPtr32}}})
1159 .Any({{DivS32}, {{Vgpr32}, {VgprPtr32}}})
1160 .Any({{UniS64}, {{Sgpr64}, {SgprPtr64}}})
1161 .Any({{DivS64}, {{Vgpr64}, {VgprPtr64}}})
1162 .Any({{UniS128}, {{Sgpr128}, {SgprPtr128}}})
1163 .Any({{DivS128}, {{Vgpr128}, {VgprPtr128}}});
1164
1165 // FIXME: Update llvm/test/CodeGen/AMDGPU/ptrmask.ll to use GlobalISel.
1166 // Currently crashes on P8 (buffer resource) tests due to legalizer issue.
1167 addRulesForGOpcs({G_PTRMASK})
1168 .Any({{UniP1}, {{SgprP1}, {SgprP1, Sgpr64}}})
1169 .Any({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}})
1170 .Any({{UniP3}, {{SgprP3}, {SgprP3, Sgpr32}}})
1171 .Any({{DivP3}, {{VgprP3}, {VgprP3, Vgpr32}}});
1172
1173 addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
1174
1175 addRulesForGOpcs({G_BITREVERSE}, Standard)
1176 .Uni(S32, {{Sgpr32}, {Sgpr32}})
1177 .Div(S32, {{Vgpr32}, {Vgpr32}})
1178 .Uni(S64, {{Sgpr64}, {Sgpr64}})
1179 .Div(S64, {{Vgpr64}, {Vgpr64}});
1180
1181 addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
1182
1183 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
1184 .Uni(S64, {{Sgpr64}, {}});
1185
1186 addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
1187
1188 addRulesForGOpcs({G_GLOBAL_VALUE})
1189 .Any({{UniP0}, {{SgprP0}, {}}})
1190 .Any({{UniP1}, {{SgprP1}, {}}})
1191 .Any({{UniP3}, {{SgprP3}, {}}})
1192 .Any({{UniP4}, {{SgprP4}, {}}})
1193 .Any({{UniP8}, {{SgprP8}, {}}});
1194
1195 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
1196
1197 addRulesForGOpcs({G_SI_CALL})
1198 .Any({{_, UniP0}, {{None}, {SgprP0}}})
1199 .Any({{_, DivP0}, {{None}, {SgprP0Call_WF}}})
1200 .Any({{_, UniP4}, {{None}, {SgprP4}}})
1201 .Any({{_, DivP4}, {{None}, {SgprP4Call_WF}}});
1202
1203 bool hasSALUFloat = ST->hasSALUFloatInsts();
1204
1205 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
1206 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1207 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1208 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1209 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1210 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
1211 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1212 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1213 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1214 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
1216 hasSALUFloat)
1217 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1218
1219 addRulesForGOpcs({G_FSUB, G_STRICT_FSUB}, Standard)
1220 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1221 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1222 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1223 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1224 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1225 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
1226
1227 addRulesForGOpcs({G_FMAD}, Standard)
1228 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1229 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1230 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1231 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1232
1233 addRulesForGOpcs({G_FLDEXP, G_STRICT_FLDEXP}, Standard)
1234 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1235 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1236 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
1237 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1238 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr32}})
1239 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
1240
1241 addRulesForGOpcs({G_FMA, G_STRICT_FMA}, Standard)
1242 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1243 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
1244 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64, Vgpr64}})
1245 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr64}})
1249 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16, Sgpr16}}, hasSALUFloat)
1250 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}}, !hasSALUFloat)
1251 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}}, hasSALUFloat)
1252 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}, !hasSALUFloat)
1253 .Uni(V2S16,
1255 hasSALUFloat)
1257 !hasSALUFloat);
1258
1259 addRulesForGOpcs({G_AMDGPU_FMED3}, Standard)
1260 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1261 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1262 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1263 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1264
1265 // TODO: This opcode is generated from the i64->i16 signed clamped pattern in
1266 // the PreLegalizerCombiner. Move the combine to RegBankCombiner to keep more
1267 // instructions on SALU.
1268 addRulesForGOpcs({G_AMDGPU_SMED3}, Standard)
1269 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1270 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1271
1272 // FNEG and FABS are either folded as source modifiers or can be selected as
1273 // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
1274 // targets without SALU float we still select them as VGPR since there would
1275 // be no real sgpr use.
1276 addRulesForGOpcs({G_FNEG, G_FABS}, Standard)
1277 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasSALUFloat)
1278 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasSALUFloat)
1279 .Div(S16, {{Vgpr16}, {Vgpr16}})
1280 .Uni(S32, {{UniInVgprS32}, {Vgpr32}}, !hasSALUFloat)
1281 .Uni(S32, {{Sgpr32}, {Sgpr32}}, hasSALUFloat)
1282 .Div(S32, {{Vgpr32}, {Vgpr32}})
1283 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1284 .Div(S64, {{Vgpr64}, {Vgpr64}})
1285 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}}, !hasSALUFloat)
1286 .Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, hasSALUFloat)
1287 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1288 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1289 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1290
1291 addRulesForGOpcs({G_FCANONICALIZE}, Standard)
1292 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
1293 .Div(S32, {{Vgpr32}, {Vgpr32}})
1294 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
1295 .Div(S16, {{Vgpr16}, {Vgpr16}})
1296 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1297 .Div(S64, {{Vgpr64}, {Vgpr64}})
1298 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}})
1299 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1300 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1301 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1302
1303 bool hasPST = ST->hasPseudoScalarTrans();
1304 addRulesForGOpcs({G_FSQRT}, Standard)
1305 .Div(S16, {{Vgpr16}, {Vgpr16}})
1306 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasPST)
1307 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasPST);
1308
1309 addRulesForGOpcs({G_FPTOUI, G_FPTOSI})
1310 .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
1311 .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
1312 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1313 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat)
1314 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1315 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1316 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
1317 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1318 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1319 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}});
1320
1321 addRulesForGOpcs({G_UITOFP, G_SITOFP})
1322 .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
1323 .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
1324 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1325 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat)
1326 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1327 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1328 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
1329 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1330 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1331 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}});
1332
1333 addRulesForGOpcs({G_FPEXT})
1334 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1335 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1336 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}})
1337 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1338 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat);
1339
1340 addRulesForGOpcs({G_AMDGPU_CVT_PK_I16_I32}, Standard)
1341 .Uni(V2S16, {{UniInVgprV2S16}, {Vgpr32, Vgpr32}})
1342 .Div(V2S16, {{VgprV2S16}, {Vgpr32, Vgpr32}});
1343
1344 addRulesForGOpcs({G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY}, Standard)
1345 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1346 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
1347
1348 addRulesForGOpcs({G_FMINIMUM, G_FMAXIMUM}, Standard)
1349 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}})
1350 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1351 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
1352 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1353 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1354 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1356 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1357
1358 addRulesForGOpcs({G_FMINNUM_IEEE, G_FMAXNUM_IEEE, G_FMINNUM, G_FMAXNUM},
1359 Standard)
1360 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1361 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1362 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1363 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1365 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
1366 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1367 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1368 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1369 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
1370
1371 addRulesForGOpcs({G_FPTRUNC})
1372 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1373 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1374 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
1376 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
1377 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1378 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat);
1379
1380 addRulesForGOpcs({G_IS_FPCLASS})
1381 .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
1382 .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
1383 .Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}})
1384 .Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}})
1385 .Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
1386 .Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});
1387
1388 addRulesForGOpcs({G_FCMP}, Standard)
1389 .Any({{UniS1, _, S16}, {{Sgpr32Trunc}, {None, Sgpr16, Sgpr16}}},
1390 hasSALUFloat)
1391 .Any({{UniS1, _, S16}, {{UniInVcc}, {None, Vgpr16, Vgpr16}}},
1392 !hasSALUFloat)
1393 .Any({{DivS1, _, S16}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
1394 .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}},
1395 hasSALUFloat)
1396 .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}},
1397 !hasSALUFloat)
1398 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
1399 .Any({{UniS1, _, S64}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
1400 .Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
1401
1402 addRulesForGOpcs({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUNDEVEN, G_FFLOOR, G_FCEIL,
1403 G_FEXP2, G_FLOG2},
1404 Standard)
1405 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
1406 .Div(S16, {{Vgpr16}, {Vgpr16}})
1407 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
1408 .Div(S32, {{Vgpr32}, {Vgpr32}})
1409 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1410 .Div(S64, {{Vgpr64}, {Vgpr64}});
1411
1412 using namespace Intrinsic;
1413
1414 addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
1415
1416 addRulesForIOpcs({amdgcn_groupstaticsize}).Any({{S32}, {{Sgpr32}, {IntrId}}});
1417
1418 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
1419 addRulesForIOpcs({amdgcn_end_cf})
1420 .Any({{_, UniS32}, {{}, {IntrId, Sgpr32}}})
1421 .Any({{_, UniS64}, {{}, {IntrId, Sgpr64}}});
1422
1423 addRulesForIOpcs({amdgcn_if_break}, Standard)
1424 .Uni(S64, {{Sgpr64}, {IntrId, Vcc, Sgpr64}})
1425 .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
1426
1427 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
1428 .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
1429
1430 addRulesForIOpcs({amdgcn_readfirstlane})
1431 .Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}})
1432 // this should not exist in the first place, it is from call lowering
1433 // readfirstlaning just in case register is not in sgpr.
1434 .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
1435
1436 addRulesForIOpcs({amdgcn_s_sleep}).Any({{_, _}, {{}, {IntrId, Imm}}});
1437
1438 addRulesForIOpcs({amdgcn_bitop3}, Standard)
1439 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1440 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1441 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1442 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1443
1444 addRulesForIOpcs({amdgcn_mul_u24, amdgcn_mul_i24}, Standard)
1445 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1446 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}})
1447 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr32, Vgpr32}})
1448 .Div(S64, {{Vgpr64}, {IntrId, Vgpr32, Vgpr32}});
1449
1450 addRulesForIOpcs({amdgcn_mulhi_u24, amdgcn_mulhi_i24, amdgcn_fmul_legacy},
1451 Standard)
1452 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1453 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1454
1455 addRulesForIOpcs({amdgcn_fma_legacy}, Standard)
1456 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1457 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1458
1459 addRulesForIOpcs({amdgcn_frexp_mant, amdgcn_fract}, Standard)
1460 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}})
1461 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1462 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
1463 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1464 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64}})
1465 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64}});
1466
1467 addRulesForIOpcs({amdgcn_prng_b32})
1468 .Any({{UniS32}, {{UniInVgprS32}, {IntrId, Vgpr32}}})
1469 .Any({{DivS32}, {{Vgpr32}, {IntrId, Vgpr32}}});
1470
1471 addRulesForIOpcs({amdgcn_sffbh}, Standard)
1472 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}})
1473 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}});
1474
1475 addRulesForIOpcs({amdgcn_ubfe, amdgcn_sbfe}, Standard)
1476 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1477 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32, Sgpr32, Sgpr32}, S_BFE})
1478 .Uni(S64, {{Sgpr64}, {IntrId, Sgpr64, Sgpr32, Sgpr32}, S_BFE})
1479 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr32, Vgpr32}, V_BFE});
1480
1481 addRulesForIOpcs({amdgcn_global_load_tr_b64})
1482 .Any({{DivB64}, {{VgprB64}, {IntrId, SgprP1}}})
1483 .Any({{DivB32}, {{VgprB32}, {IntrId, SgprP1}}});
1484
1485 addRulesForIOpcs({amdgcn_global_load_tr_b128})
1486 .Any({{DivB64}, {{VgprB64}, {IntrId, SgprP1}}})
1487 .Any({{DivB128}, {{VgprB128}, {IntrId, SgprP1}}});
1488
1489 addRulesForIOpcs({amdgcn_global_atomic_ordered_add_b64})
1490 .Any({{DivS64}, {{Vgpr64}, {IntrId, VgprP1, Vgpr64}}});
1491
1492 addRulesForIOpcs({amdgcn_raw_buffer_load_lds})
1493 .Any({{_}, {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Sgpr32}}});
1494
1495 addRulesForIOpcs({amdgcn_struct_buffer_load_lds})
1496 .Any({{_},
1497 {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1498
1499 addRulesForIOpcs({amdgcn_raw_ptr_buffer_load_lds})
1500 .Any({{_}, {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Sgpr32}}});
1501
1502 addRulesForIOpcs({amdgcn_struct_ptr_buffer_load_lds})
1503 .Any({{_}, {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1504
1505 addRulesForIOpcs({amdgcn_wwm, amdgcn_strict_wwm}, StandardB)
1506 .Div(B32, {{VgprB32}, {IntrId, VgprB32}})
1507 .Uni(B32, {{SgprB32}, {IntrId, SgprB32}})
1508 .Div(B64, {{VgprB64}, {IntrId, VgprB64}})
1509 .Uni(B64, {{SgprB64}, {IntrId, SgprB64}})
1510 .Div(B96, {{VgprB96}, {IntrId, VgprB96}})
1511 .Uni(B96, {{SgprB96}, {IntrId, SgprB96}})
1512 .Div(B128, {{VgprB128}, {IntrId, VgprB128}})
1513 .Uni(B128, {{SgprB128}, {IntrId, SgprB128}})
1514 .Any({{UniB256}, {{SgprB256}, {IntrId, SgprB256}}})
1515 .Any({{DivB256}, {{VgprB256}, {IntrId, VgprB256}}})
1516 .Any({{UniB512}, {{SgprB512}, {IntrId, SgprB512}}})
1517 .Any({{DivB512}, {{VgprB512}, {IntrId, VgprB512}}});
1518
1519} // end initialize rules
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
constexpr LLT S16
constexpr LLT S1
constexpr LLT V2S16
constexpr LLT S32
constexpr LLT V4S32
constexpr LLT V3S32
constexpr LLT S64
constexpr LLT V2S32
constexpr LLT S128
UniformityLLTOpPredicateID LLTToBId(LLT Ty)
bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI)
UniformityLLTOpPredicateID LLTToId(LLT Ty)
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
Register Reg
Register const TargetRegisterInfo * TRI
Machine IR instance of the generic uniformity analysis.
bool operator()(const MachineInstr &MI) const
Predicate operator||(const Predicate &RHS) const
Predicate operator&&(const Predicate &RHS) const
Predicate(std::function< bool(const MachineInstr &)> Pred)
Predicate operator!() const
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
const SetOfRulesForOpcode * getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping * findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
void addFastRuleUniform(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
bool isSigned() const
Definition InstrTypes.h:930
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
bool isEquality() const
Return true if this predicate is either EQ or NE.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
TypeSize getValue() const
Representation of each machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void swap(SmallVectorImpl &RHS)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isAnyPtr(LLT Ty, unsigned Width)
bool isUniformMMO(const MachineMemOperand *MMO)
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
Definition SIInstrInfo.h:44
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
SmallVector< UniformityLLTOpPredicateID, 4 > OpUniformityAndTypes
PredicateMapping(std::initializer_list< UniformityLLTOpPredicateID > OpList, std::function< bool(const MachineInstr &)> TestFunc=nullptr)
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI) const
std::function< bool(const MachineInstr &)> TestFunc
RegBankLLTMapping(std::initializer_list< RegBankLLTMappingApplyID > DstOpMappingList, std::initializer_list< RegBankLLTMappingApplyID > SrcOpMappingList, LoweringMethodID LoweringMethod=DoNotLower)
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39