LLVM 23.0.0git
AMDGPURegBankLegalizeRules.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
39
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
44
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(0, 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(1, 64);
63 case P2:
64 return MRI.getType(Reg) == LLT::pointer(2, 32);
65 case P3:
66 return MRI.getType(Reg) == LLT::pointer(3, 32);
67 case P4:
68 return MRI.getType(Reg) == LLT::pointer(4, 64);
69 case P5:
70 return MRI.getType(Reg) == LLT::pointer(5, 32);
71 case P8:
72 return MRI.getType(Reg) == LLT::pointer(8, 128);
73 case Ptr32:
74 return isAnyPtr(MRI.getType(Reg), 32);
75 case Ptr64:
76 return isAnyPtr(MRI.getType(Reg), 64);
77 case Ptr128:
78 return isAnyPtr(MRI.getType(Reg), 128);
79 case V2S16:
80 return MRI.getType(Reg) == LLT::fixed_vector(2, 16);
81 case V2S32:
82 return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
83 case V3S32:
84 return MRI.getType(Reg) == LLT::fixed_vector(3, 32);
85 case V4S32:
86 return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
87 case B32:
88 return MRI.getType(Reg).getSizeInBits() == 32;
89 case B64:
90 return MRI.getType(Reg).getSizeInBits() == 64;
91 case B96:
92 return MRI.getType(Reg).getSizeInBits() == 96;
93 case B128:
94 return MRI.getType(Reg).getSizeInBits() == 128;
95 case B160:
96 return MRI.getType(Reg).getSizeInBits() == 160;
97 case B256:
98 return MRI.getType(Reg).getSizeInBits() == 256;
99 case B512:
100 return MRI.getType(Reg).getSizeInBits() == 512;
101 case UniS1:
102 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniform(Reg);
103 case UniS16:
104 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniform(Reg);
105 case UniS32:
106 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg);
107 case UniS64:
108 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg);
109 case UniS128:
110 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isUniform(Reg);
111 case UniP0:
112 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniform(Reg);
113 case UniP1:
114 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg);
115 case UniP2:
116 return MRI.getType(Reg) == LLT::pointer(2, 32) && MUI.isUniform(Reg);
117 case UniP3:
118 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniform(Reg);
119 case UniP4:
120 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
121 case UniP5:
122 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
123 case UniP8:
124 return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniform(Reg);
125 case UniPtr32:
126 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
127 case UniPtr64:
128 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isUniform(Reg);
129 case UniPtr128:
130 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
131 case UniV2S16:
132 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
133 case UniV2S32:
134 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isUniform(Reg);
135 case UniB32:
136 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
137 case UniB64:
138 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(Reg);
139 case UniB96:
140 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(Reg);
141 case UniB128:
142 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(Reg);
143 case UniB160:
144 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isUniform(Reg);
145 case UniB256:
146 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg);
147 case UniB512:
148 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
149 case UniBRC: {
150 if (!MUI.isUniform(Reg))
151 return false;
152 // Check if there is SGPR register class of same size as the LLT.
153 const SIRegisterInfo *TRI =
154 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
155 // There is no 16 bit SGPR register class. Extra size check is required
156 // since getSGPRClassForBitWidth returns SReg_32RegClass for Size 16.
157 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
158 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(LLTSize);
159 }
160 case DivS1:
161 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg);
162 case DivS16:
163 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg);
164 case DivS32:
165 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
166 case DivS64:
167 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg);
168 case DivS128:
169 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isDivergent(Reg);
170 case DivP0:
171 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergent(Reg);
172 case DivP1:
173 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg);
174 case DivP2:
175 return MRI.getType(Reg) == LLT::pointer(2, 32) && MUI.isDivergent(Reg);
176 case DivP3:
177 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergent(Reg);
178 case DivP4:
179 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg);
180 case DivP5:
181 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg);
182 case DivPtr32:
183 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isDivergent(Reg);
184 case DivPtr64:
185 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isDivergent(Reg);
186 case DivPtr128:
187 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
188 case DivV2S16:
189 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
190 case DivV2S32:
191 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isDivergent(Reg);
192 case DivV3S32:
193 return MRI.getType(Reg) == LLT::fixed_vector(3, 32) && MUI.isDivergent(Reg);
194 case DivV4S16:
195 return MRI.getType(Reg) == LLT::fixed_vector(4, 16) && MUI.isDivergent(Reg);
196 case DivB32:
197 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
198 case DivB64:
199 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(Reg);
200 case DivB96:
201 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(Reg);
202 case DivB128:
203 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(Reg);
204 case DivB160:
205 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isDivergent(Reg);
206 case DivB256:
207 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(Reg);
208 case DivB512:
209 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg);
210 case DivBRC: {
211 if (!MUI.isDivergent(Reg))
212 return false;
213 // Check if there is VGPR register class of same size as the LLT.
214 const SIRegisterInfo *TRI =
215 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
216 return TRI->getSGPRClassForBitWidth(MRI.getType(Reg).getSizeInBits());
217 }
218 case _:
219 return true;
220 default:
221 llvm_unreachable("missing matchUniformityAndLLT");
222 }
223}
224
226 const MachineUniformityInfo &MUI,
227 const MachineRegisterInfo &MRI) const {
228 // Check LLT signature.
229 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
230 const MachineOperand &MO = MI.getOperand(i);
231 if (OpUniformityAndTypes[i] == _) {
232 assert((!MI.getOperand(i).isReg() ||
233 !MI.getOperand(i).getReg().isVirtual()) &&
234 "_ is for non-register and physical register operands only");
235 continue;
236 }
237
238 // Remaining IDs check registers.
239 if (!MO.isReg())
240 return false;
241
242 if (!matchUniformityAndLLT(MO.getReg(), OpUniformityAndTypes[i], MUI, MRI))
243 return false;
244 }
245
246 // More complex check.
247 if (TestFunc)
248 return TestFunc(MI);
249
250 return true;
251}
252
254
256 : FastTypes(FastTypes) {}
257
259 if (Ty == LLT::scalar(16))
260 return S16;
261 if (Ty == LLT::scalar(32))
262 return S32;
263 if (Ty == LLT::scalar(64))
264 return S64;
265 if (Ty == LLT::fixed_vector(2, 16))
266 return V2S16;
267 if (Ty == LLT::fixed_vector(2, 32))
268 return V2S32;
269 if (Ty == LLT::fixed_vector(3, 32))
270 return V3S32;
271 if (Ty == LLT::fixed_vector(4, 32))
272 return V4S32;
273 return _;
274}
275
277 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
278 isAnyPtr(Ty, 32))
279 return B32;
280 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
281 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
282 return B64;
283 if (Ty == LLT::fixed_vector(3, 32))
284 return B96;
285 if (Ty == LLT::fixed_vector(4, 32) || Ty == LLT::fixed_vector(2, 64) ||
286 Ty == LLT::fixed_vector(8, 16) || isAnyPtr(Ty, 128))
287 return B128;
288 return _;
289}
290
291const RegBankLLTMapping *
293 const MachineRegisterInfo &MRI,
294 const MachineUniformityInfo &MUI) const {
295 // Search in "Fast Rules".
296 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
297 // slot that could "match fast Predicate". If not, InvalidMapping is
298 // returned which results in failure, does not search "Slow Rules".
299 if (FastTypes != NoFastRules) {
300 Register Reg = MI.getOperand(0).getReg();
301 int Slot;
302 if (FastTypes == StandardB)
303 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));
304 else
305 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));
306
307 if (Slot != -1)
308 return MUI.isUniform(Reg) ? &Uni[Slot] : &Div[Slot];
309 }
310
311 // Slow search for more complex rules.
312 for (const RegBankLegalizeRule &Rule : Rules) {
313 if (Rule.Predicate.match(MI, MUI, MRI))
314 return &Rule.OperandMapping;
315 }
316
317 return nullptr;
318}
319
321 Rules.push_back(Rule);
322}
323
325 RegBankLLTMapping RuleApplyIDs) {
326 int Slot = getFastPredicateSlot(Ty);
327 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
328 Div[Slot] = std::move(RuleApplyIDs);
329}
330
332 RegBankLLTMapping RuleApplyIDs) {
333 int Slot = getFastPredicateSlot(Ty);
334 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
335 Uni[Slot] = std::move(RuleApplyIDs);
336}
337
338int SetOfRulesForOpcode::getFastPredicateSlot(
340 switch (FastTypes) {
341 case Standard: {
342 switch (Ty) {
343 case S32:
344 return 0;
345 case S16:
346 return 1;
347 case S64:
348 return 2;
349 case V2S16:
350 return 3;
351 default:
352 return -1;
353 }
354 }
355 case StandardB: {
356 switch (Ty) {
357 case B32:
358 return 0;
359 case B64:
360 return 1;
361 case B96:
362 return 2;
363 case B128:
364 return 3;
365 default:
366 return -1;
367 }
368 }
369 case Vector: {
370 switch (Ty) {
371 case S32:
372 return 0;
373 case V2S32:
374 return 1;
375 case V3S32:
376 return 2;
377 case V4S32:
378 return 3;
379 default:
380 return -1;
381 }
382 }
383 default:
384 return -1;
385 }
386}
387
388RegBankLegalizeRules::RuleSetInitializer
389RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
390 FastRulesTypes FastTypes) {
391 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
392}
393
394RegBankLegalizeRules::RuleSetInitializer
395RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
396 FastRulesTypes FastTypes) {
397 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
398}
399
402 unsigned Opc = MI.getOpcode();
403 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
404 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
405 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
406 unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
407 auto IRAIt = IRulesAlias.find(IntrID);
408 if (IRAIt == IRulesAlias.end())
409 return nullptr;
410 return &IRules.at(IRAIt->second);
411 }
412
413 auto GRAIt = GRulesAlias.find(Opc);
414 if (GRAIt == GRulesAlias.end())
415 return nullptr;
416 return &GRules.at(GRAIt->second);
417}
418
419// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
420class Predicate {
421private:
422 struct Elt {
423 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
424 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
425 // Sequences of && and || will be represented by jumps, for example:
426 // (A && B && ... X) or (A && B && ... X) || Y
427 // A == true jump to B
428 // A == false jump to end or Y, result is A(false) or Y
429 // (A || B || ... X) or (A || B || ... X) && Y
430 // A == true jump to end or Y, result is A(true) or Y
431 // A == false jump to B
432 // Notice that when negating expression, we simply flip Neg on each Pred
433 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
434 std::function<bool(const MachineInstr &)> Pred;
435 bool Neg; // Neg of Pred is calculated before jump
436 unsigned TJumpOffset;
437 unsigned FJumpOffset;
438 };
439
440 SmallVector<Elt, 8> Expression;
441
442 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(Expr); };
443
444public:
445 Predicate(std::function<bool(const MachineInstr &)> Pred) {
446 Expression.push_back({Pred, false, 1, 1});
447 };
448
449 bool operator()(const MachineInstr &MI) const {
450 unsigned Idx = 0;
451 unsigned ResultIdx = Expression.size();
452 bool Result;
453 do {
454 Result = Expression[Idx].Pred(MI);
455 Result = Expression[Idx].Neg ? !Result : Result;
456 if (Result) {
457 Idx += Expression[Idx].TJumpOffset;
458 } else {
459 Idx += Expression[Idx].FJumpOffset;
460 }
461 } while ((Idx != ResultIdx));
462
463 return Result;
464 };
465
466 Predicate operator!() const {
467 SmallVector<Elt, 8> NegExpression;
468 for (const Elt &ExprElt : Expression) {
469 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
470 ExprElt.TJumpOffset});
471 }
472 return Predicate(std::move(NegExpression));
473 };
474
475 Predicate operator&&(const Predicate &RHS) const {
476 SmallVector<Elt, 8> AndExpression = Expression;
477
478 unsigned RHSSize = RHS.Expression.size();
479 unsigned ResultIdx = Expression.size();
480 for (unsigned i = 0; i < ResultIdx; ++i) {
481 // LHS results in false, whole expression results in false.
482 if (i + AndExpression[i].FJumpOffset == ResultIdx)
483 AndExpression[i].FJumpOffset += RHSSize;
484 }
485
486 AndExpression.append(RHS.Expression);
487
488 return Predicate(std::move(AndExpression));
489 }
490
491 Predicate operator||(const Predicate &RHS) const {
492 SmallVector<Elt, 8> OrExpression = Expression;
493
494 unsigned RHSSize = RHS.Expression.size();
495 unsigned ResultIdx = Expression.size();
496 for (unsigned i = 0; i < ResultIdx; ++i) {
497 // LHS results in true, whole expression results in true.
498 if (i + OrExpression[i].TJumpOffset == ResultIdx)
499 OrExpression[i].TJumpOffset += RHSSize;
500 }
501
502 OrExpression.append(RHS.Expression);
503
504 return Predicate(std::move(OrExpression));
505 }
506};
507
508// Initialize rules
511 : ST(&_ST), MRI(&_MRI) {
512
513 addRulesForGOpcs({G_ADD, G_SUB}, Standard)
514 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
515 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
516 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
517 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
519 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
520 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}})
521 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}});
522
523 addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
524 .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
525 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
526
527 addRulesForGOpcs({G_UADDE, G_USUBE, G_SADDE, G_SSUBE}, Standard)
529 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
530
531 addRulesForGOpcs({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}, Standard)
532 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
533 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
534 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
535 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
537 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
538
539 bool HasVecMulU64 = ST->hasVectorMulU64();
540 addRulesForGOpcs({G_MUL}, Standard)
541 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
542 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
543 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
544 .Uni(S64, {{SgprB64}, {SgprB64, SgprB64}})
546 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
547 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
548 .Div(S64, {{VgprB64}, {VgprB64, VgprB64}}, HasVecMulU64)
549 .Div(S64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32Mul}, !HasVecMulU64);
550
551 bool hasMulHi = ST->hasScalarMulHiInsts();
552 addRulesForGOpcs({G_UMULH, G_SMULH}, Standard)
553 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
554 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasMulHi)
555 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasMulHi);
556
557 addRulesForGOpcs({G_AMDGPU_MAD_U64_U32}, Standard)
558 .Div(S64, {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}})
560
561 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
562 addRulesForGOpcs({G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32}, Standard)
563 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}, UniMul64}, HasScalarSMulU64)
564 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}, DivSMulToMAD});
565
566 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
568 .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
569 .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
570 .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
571 .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
572 .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
573 .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
574 .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
575
576 addRulesForGOpcs({G_SHL}, Standard)
577 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
578 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
580 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
581 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
582 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
583 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
584 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
585
586 addRulesForGOpcs({G_LSHR}, Standard)
587 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
588 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
590 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
591 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
592 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
593 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
594 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
595
596 addRulesForGOpcs({G_ASHR}, Standard)
597 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
598 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
600 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
601 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
602 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
603 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
604 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
605
606 addRulesForGOpcs({G_FSHR}, Standard)
607 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
608 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
609
610 addRulesForGOpcs({G_BSWAP}, Standard)
611 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
612 .Div(S16, {{Vgpr16}, {Vgpr16}})
613 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
614 .Div(S32, {{Vgpr32}, {Vgpr32}})
615 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}})
616 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}});
617
618 addRulesForGOpcs({G_AMDGPU_CVT_F32_UBYTE0, G_AMDGPU_CVT_F32_UBYTE1,
619 G_AMDGPU_CVT_F32_UBYTE2, G_AMDGPU_CVT_F32_UBYTE3,
620 G_AMDGPU_RCP_IFLAG},
621 Standard)
622 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
623 .Div(S32, {{Vgpr32}, {Vgpr32}});
624
625 addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
626
627 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
628 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
629 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
630 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
631 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
632
633 addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
634 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
635 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
636 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
637 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
639 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
640
641 addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
642 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
643 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
644 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
645 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
647 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
648
649 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT and G_FCONSTANT
650 // here, rest is trivially regbankselected earlier
651 addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
652 addRulesForGOpcs({G_CONSTANT})
653 .Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
654
655 addRulesForGOpcs({G_FREEZE})
656 .Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt}}})
657 .Any({{DivS1}, {{Vcc}, {Vcc}}})
658 .Any({{UniS16}, {{Sgpr16}, {Sgpr16}}})
659 .Any({{UniBRC}, {{SgprBRC}, {SgprBRC}}})
660 .Any({{DivBRC}, {{VgprBRC}, {VgprBRC}}});
661
662 addRulesForGOpcs({G_UNMERGE_VALUES})
663 .Any({{UniS16}, {{}, {}, UnmergeToShiftTrunc}})
664 .Any({{UniBRC}, {{}, {}, VerifyAllSgpr}})
665 .Any({{DivBRC}, {{}, {}, ApplyAllVgpr}});
666
667 addRulesForGOpcs({G_PHI})
668 .Any({{UniS1}, {{}, {}, AextToS32InIncomingBlockGPHI}})
669 .Any({{UniS16}, {{}, {}, VerifyAllSgprGPHI}})
670 .Any({{UniBRC}, {{}, {}, VerifyAllSgprGPHI}})
671 .Any({{DivBRC}, {{}, {}, VerifyAllSgprOrVgprGPHI}});
672
673 // LOAD {Div}, {{VgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
674 // LOAD {Uni}, {{UniInVgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
675 // LOAD_NORET {}, {{}, {Imm, VgprSrc, ..., Sgpr_WF_RsrcIdx}}
676 // STORE {}, {{}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
677 addRulesForGOpcs({G_AMDGPU_INTRIN_IMAGE_LOAD, G_AMDGPU_INTRIN_IMAGE_LOAD_D16,
678 G_AMDGPU_INTRIN_IMAGE_LOAD_NORET,
679 G_AMDGPU_INTRIN_IMAGE_STORE,
680 G_AMDGPU_INTRIN_IMAGE_STORE_D16})
681 .Any({{}, {{}, {}, ApplyINTRIN_IMAGE}});
682
683 Predicate isSignedICmp([](const MachineInstr &MI) -> bool {
684 auto Pred =
685 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
686 return CmpInst::isSigned(Pred);
687 });
688
689 Predicate isEqualityICmp([](const MachineInstr &MI) -> bool {
690 auto Pred =
691 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
692 return ICmpInst::isEquality(Pred);
693 });
694
695 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
696 // clang-format off
697 addRulesForGOpcs({G_ICMP})
698 .Any({{{UniS1, _, S16}, isEqualityICmp}, {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
699 .Any({{{UniS1, _, S16}, !isEqualityICmp && isSignedICmp}, {{Sgpr32Trunc}, {None, Sgpr32SExt, Sgpr32SExt}}})
700 .Any({{{UniS1, _, S16}, !isEqualityICmp && !isSignedICmp}, {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
701 .Any({{{DivS1, _, S16}}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
702 .Any({{{UniS1, _, S32}}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
703 .Any({{{DivS1, _, S32}}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
704 .Any({{{UniS1, _, S64}, isEqualityICmp}, {{Sgpr32Trunc}, {None, Sgpr64, Sgpr64}}}, HasScalarCompareEq64)
705 .Any({{{UniS1, _, S64}, isEqualityICmp}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}}, !HasScalarCompareEq64)
706 .Any({{{UniS1, _, S64}, !isEqualityICmp}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
707 .Any({{{DivS1, _, S64}}, {{Vcc}, {None, Vgpr64, Vgpr64}}})
708 .Any({{{UniS1, _, Ptr32}}, {{Sgpr32Trunc}, {None, SgprPtr32, SgprPtr32}}})
709 .Any({{{DivS1, _, Ptr32}}, {{Vcc}, {None, VgprPtr32, VgprPtr32}}})
710 .Any({{{UniS1, _, Ptr64}, isEqualityICmp}, {{Sgpr32Trunc}, {None, SgprPtr64, SgprPtr64}}}, HasScalarCompareEq64)
711 .Any({{{UniS1, _, Ptr64}, isEqualityICmp}, {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}}, !HasScalarCompareEq64)
712 .Any({{{UniS1, _, Ptr64}, !isEqualityICmp}, {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}})
713 .Any({{{DivS1, _, Ptr64}}, {{Vcc}, {None, VgprPtr64, VgprPtr64}}});
714 // clang-format on
715
716 addRulesForGOpcs({G_BRCOND})
717 .Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}})
718 .Any({{DivS1}, {{}, {Vcc}}});
719
720 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
721
722 addRulesForGOpcs({G_SELECT}, StandardB)
723 .Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
725 .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
729
730 addRulesForGOpcs({G_ANYEXT})
731 .Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away
732 .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
733 .Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away
734 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
735 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
736 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
737 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
738 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
739 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
740 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
741
742 bool Has16bitCmp = ST->has16BitInsts();
743
744 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
745 // It is up to user to deal with truncated bits.
746 addRulesForGOpcs({G_TRUNC})
747 .Any({{UniS1, UniS16}, {{None}, {None}}}) // should be combined away
748 .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
749 .Any({{UniS1, UniS64}, {{None}, {None}}}) // should be combined away
750 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
751 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
752 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
753 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
754 .Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
755 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
756 // This is non-trivial. VgprToVccCopy is done using compare instruction.
757 .Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}}, Has16bitCmp)
759 !Has16bitCmp)
760 .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
761 .Any({{DivS1, DivS64}, {{Vcc}, {Vgpr64}, VgprToVccCopy}});
762
763 addRulesForGOpcs({G_ZEXT})
767 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
768 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
769 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
770 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
771 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
772 // not extending S16 to S32 is questionable.
773 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
774 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
775 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
776 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
777
778 addRulesForGOpcs({G_SEXT})
782 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
783 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
784 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
785 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
786 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
787 // not extending S16 to S32 is questionable.
788 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
789 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
790 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
791 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
792
793 addRulesForGOpcs({G_SEXT_INREG})
794 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
795 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
796 .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
798
799 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)
800 .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}})
801 .Div(S32, {{Vgpr32}, {Vgpr32, Imm}})
802 .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
803 .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
804
805 addRulesForGOpcs({G_ASSERT_ALIGN}, Standard)
806 .Uni(S32, {{Sgpr32}, {Sgpr32}})
807 .Div(S32, {{Vgpr32}, {Vgpr32}})
808 .Uni(S64, {{Sgpr64}, {Sgpr64}})
809 .Div(S64, {{Vgpr64}, {Vgpr64}})
810 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32}}})
811 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32}}})
812 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64}}})
813 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64}}});
814
815 // Atomic read-modify-write operations: result and value are always VGPR,
816 // pointer varies by address space.
817 addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
818 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
819 G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN,
820 G_ATOMICRMW_UMAX, G_ATOMICRMW_UINC_WRAP,
821 G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX})
822 .Any({{DivS32, P0, S32}, {{Vgpr32}, {VgprP0, Vgpr32}}})
823 .Any({{DivS64, P0, S64}, {{Vgpr64}, {VgprP0, Vgpr64}}})
824 .Any({{DivS32, P1, S32}, {{Vgpr32}, {VgprP1, Vgpr32}}})
825 .Any({{DivS64, P1, S64}, {{Vgpr64}, {VgprP1, Vgpr64}}})
826 .Any({{DivS32, P3, S32}, {{Vgpr32}, {VgprP3, Vgpr32}}})
827 .Any({{DivS64, P3, S64}, {{Vgpr64}, {VgprP3, Vgpr64}}});
828
829 bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
830 bool HasAtomicBufferGlobalPkAddF16Insts =
831 ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
832 ST->hasAtomicBufferGlobalPkAddF16Insts();
833 bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
834 addRulesForGOpcs({G_ATOMICRMW_FADD})
835 .Any({{DivS32, P0, S32}, {{Vgpr32}, {VgprP0, Vgpr32}}})
836 .Any({{DivS64, P0, S64}, {{Vgpr64}, {VgprP0, Vgpr64}}})
837 .Any({{DivS32, P1, S32}, {{Vgpr32}, {VgprP1, Vgpr32}}})
838 .Any({{DivS64, P1, S64}, {{Vgpr64}, {VgprP1, Vgpr64}}})
839 .Any({{DivS32, P3, S32}, {{Vgpr32}, {VgprP3, Vgpr32}}})
840 .Any({{DivS64, P3, S64}, {{Vgpr64}, {VgprP3, Vgpr64}}})
841 .Any({{DivV2S16, P0, V2S16}, {{VgprV2S16}, {VgprP0, VgprV2S16}}},
842 HasAtomicFlatPkAdd16Insts)
843 .Any({{DivV2S16, P1, V2S16}, {{VgprV2S16}, {VgprP1, VgprV2S16}}},
844 HasAtomicBufferGlobalPkAddF16Insts)
845 .Any({{DivV2S16, P3, V2S16}, {{VgprV2S16}, {VgprP3, VgprV2S16}}},
846 HasAtomicDsPkAdd16Insts);
847
848 addRulesForGOpcs({G_ATOMIC_CMPXCHG})
849 .Any({{DivS32, P2}, {{Vgpr32}, {VgprP2, Vgpr32, Vgpr32}}})
850 .Any({{DivS64, P2}, {{Vgpr64}, {VgprP2, Vgpr64, Vgpr64}}})
851 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3, Vgpr32, Vgpr32}}})
852 .Any({{DivS64, P3}, {{Vgpr64}, {VgprP3, Vgpr64, Vgpr64}}});
853
854 addRulesForGOpcs({G_AMDGPU_ATOMIC_CMPXCHG})
855 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0, VgprV2S32}}})
856 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1, VgprV2S32}}})
857 .Any({{DivS64, P0}, {{Vgpr64}, {VgprP0, VgprV2S64}}})
858 .Any({{DivS64, P1}, {{Vgpr64}, {VgprP1, VgprV2S64}}});
859
860 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_CMPSWAP}, Standard)
861 .Div(S32, {{Vgpr32},
863 .Div(S64, {{Vgpr64},
865
866 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_SWAP, G_AMDGPU_BUFFER_ATOMIC_UMAX,
867 G_AMDGPU_BUFFER_ATOMIC_UMIN, G_AMDGPU_BUFFER_ATOMIC_SMAX,
868 G_AMDGPU_BUFFER_ATOMIC_SMIN, G_AMDGPU_BUFFER_ATOMIC_FMAX,
869 G_AMDGPU_BUFFER_ATOMIC_FMIN},
870 Standard)
873
874 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
875 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
876 bool usesTrue16 = ST->useRealTrue16Insts();
877
878 Predicate isAlign16([](const MachineInstr &MI) -> bool {
879 return (*MI.memoperands_begin())->getAlign() >= Align(16);
880 });
881
882 Predicate isAlign4([](const MachineInstr &MI) -> bool {
883 return (*MI.memoperands_begin())->getAlign() >= Align(4);
884 });
885
886 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
887 return (*MI.memoperands_begin())->isAtomic();
888 });
889
890 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
891 return AMDGPU::isUniformMMO(*MI.memoperands_begin());
892 });
893
894 Predicate isConst([](const MachineInstr &MI) -> bool {
895 // Address space in MMO be different then address space on pointer.
896 const MachineMemOperand *MMO = *MI.memoperands_begin();
897 const unsigned AS = MMO->getAddrSpace();
898 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
900 });
901
902 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
903 return (*MI.memoperands_begin())->isVolatile();
904 });
905
906 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
907 return (*MI.memoperands_begin())->isInvariant();
908 });
909
910 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
911 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
912 });
913
914 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
915 const MachineMemOperand *MMO = *MI.memoperands_begin();
916 return MMO->getAlign() >= Align(MMO->getSize().getValue());
917 });
918
919 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
920 const MachineMemOperand *MMO = *MI.memoperands_begin();
921 const unsigned MemSize = 8 * MMO->getSize().getValue();
922 return MemSize == 16 || MemSize == 8;
923 });
924
925 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
926 const MachineMemOperand *MMO = *MI.memoperands_begin();
927 return 8 * MMO->getSize().getValue() == 32;
928 });
929
930 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
931 (isConst || isInvMMO || isNoClobberMMO);
932
933 // clang-format off
934 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
935 addRulesForGOpcs({G_LOAD})
936 // flat, addrspace(0), never uniform - flat_load
937 .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
938 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
939 .Any({{DivB64, P0}, {{VgprB64}, {VgprP0}}})
940 .Any({{DivB96, P0}, {{VgprB96}, {VgprP0}}})
941 .Any({{DivB128, P0}, {{VgprB128}, {VgprP0}}})
942
943 // global, addrspace(1)
944 // divergent - global_load
945 .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
946 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
947 .Any({{DivB64, P1}, {{VgprB64}, {VgprP1}}})
948 .Any({{DivB96, P1}, {{VgprB96}, {VgprP1}}})
949 .Any({{DivB128, P1}, {{VgprB128}, {VgprP1}}})
950 .Any({{DivB256, P1}, {{VgprB256}, {VgprP1}, SplitLoad}})
951 .Any({{DivB512, P1}, {{VgprB512}, {VgprP1}, SplitLoad}})
952
953 // uniform - s_load
954 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
955 .Any({{{UniS16, P1}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
956 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
957 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
958 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
959 .Any({{{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}}}) //32-bit load
960 .Any({{{UniB64, P1}, isAlign4 && isUL}, {{SgprB64}, {SgprP1}}})
961 .Any({{{UniB96, P1}, isAlign16 && isUL}, {{SgprB96}, {SgprP1}, WidenLoad}}, !hasSMRDx3)
962 .Any({{{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP1}, SplitLoad}}, !hasSMRDx3)
963 .Any({{{UniB96, P1}, isAlign4 && isUL}, {{SgprB96}, {SgprP1}}}, hasSMRDx3)
964 .Any({{{UniB128, P1}, isAlign4 && isUL}, {{SgprB128}, {SgprP1}}})
965 .Any({{{UniB256, P1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
966 .Any({{{UniB512, P1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
967
968 // Uniform via global or buffer load, for example volatile or non-aligned
969 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
970 // selected as global_load, use SgprP1 for pointer instead to match
971 // patterns without flat-for-global, default for GFX7 and older.
972 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
973 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
974 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
975 .Any({{{UniS16, P1}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && !hasSMRDSmall) // s16 load
976 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
977 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
978 .Any({{{UniB64, P1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
979 .Any({{{UniB96, P1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
980 .Any({{{UniB128, P1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
981 .Any({{{UniB256, P1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP1}, SplitLoad}})
982 .Any({{{UniB512, P1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP1}, SplitLoad}})
983
984 // local, addrspace(3) - ds_load
985 .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
986 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
987 .Any({{DivB64, P3}, {{VgprB64}, {VgprP3}}})
988 .Any({{DivB96, P3}, {{VgprB96}, {VgprP3}}})
989 .Any({{DivB128, P3}, {{VgprB128}, {VgprP3}}})
990
991 .Any({{UniS16, P3}, {{UniInVgprS16}, {SgprP3}}}, usesTrue16) // 16-bit load
992 .Any({{UniB32, P3}, {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
993 .Any({{UniB64, P3}, {{UniInVgprB64}, {VgprP3}}})
994 .Any({{UniB96, P3}, {{UniInVgprB96}, {VgprP3}}})
995 .Any({{UniB128, P3}, {{UniInVgprB128}, {VgprP3}}})
996
997 // constant, addrspace(4)
998 // divergent - global_load
999 .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
1000 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
1001 .Any({{DivB64, P4}, {{VgprB64}, {VgprP4}}})
1002 .Any({{DivB96, P4}, {{VgprB96}, {VgprP4}}})
1003 .Any({{DivB128, P4}, {{VgprB128}, {VgprP4}}})
1004 .Any({{DivB256, P4}, {{VgprB256}, {VgprP4}, SplitLoad}})
1005 .Any({{DivB512, P4}, {{VgprB512}, {VgprP4}, SplitLoad}})
1006
1007 // uniform - s_load
1008 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
1009 .Any({{{UniS16, P4}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
1010 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1011 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
1012 .Any({{{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) //32-bit load
1013 .Any({{{UniB64, P4}, isAlign4 && isUL}, {{SgprB64}, {SgprP4}}})
1014 .Any({{{UniB96, P4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasSMRDx3)
1015 .Any({{{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasSMRDx3)
1016 .Any({{{UniB96, P4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasSMRDx3)
1017 .Any({{{UniB128, P4}, isAlign4 && isUL}, {{SgprB128}, {SgprP4}}})
1018 .Any({{{UniB256, P4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}})
1019 .Any({{{UniB512, P4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}})
1020
1021 // uniform in vgpr - global_load or buffer_load
1022 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
1023 .Any({{{UniS16, P4}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && !hasSMRDSmall) // s16 load
1024 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1025 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1026 .Any({{{UniB64, P4}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP4}}})
1027 .Any({{{UniB96, P4}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP4}}})
1028 .Any({{{UniB128, P4}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP4}}})
1029 .Any({{{UniB256, P4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP4}, SplitLoad}})
1030 .Any({{{UniB512, P4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP4}, SplitLoad}})
1031
1032 // private, addrspace(5), never uniform - scratch_load
1033 .Any({{DivS16, P5}, {{Vgpr16}, {VgprP5}}}, usesTrue16)
1034 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1035 .Any({{DivB64, P5}, {{VgprB64}, {VgprP5}}})
1036 .Any({{DivB96, P5}, {{VgprB96}, {VgprP5}}})
1037 .Any({{DivB128, P5}, {{VgprB128}, {VgprP5}}})
1038
1039 .Any({{DivS32, Ptr128}, {{Vgpr32}, {VgprPtr128}}});
1040
1041
1042 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
1043 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0}}})
1044
1045 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1}}})
1046 .Any({{{UniS32, P1}, isAlign4 && isUL}, {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)
1047 .Any({{{UniS32, P1}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP1}}}, hasSMRDSmall)
1048 .Any({{{UniS32, P1}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP1}}}, !hasSMRDSmall)
1049 .Any({{{UniS32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP1}}}, hasSMRDSmall)
1050
1051 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3}}})
1052 .Any({{UniS32, P3}, {{UniInVgprS32}, {VgprP3}}})
1053
1054 .Any({{DivS32, P4}, {{Vgpr32}, {VgprP4}}})
1055 .Any({{{UniS32, P4}, isAlign4 && isUL}, {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)
1056 .Any({{{UniS32, P4}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP4}}}, hasSMRDSmall)
1057 .Any({{{UniS32, P4}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP4}}}, !hasSMRDSmall)
1058 .Any({{{UniS32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP4}}}, hasSMRDSmall)
1059
1060 .Any({{DivS32, P5}, {{Vgpr32}, {VgprP5}}});
1061
1062 addRulesForGOpcs({G_STORE})
1063 // addrspace(0)
1064 .Any({{S16, P0}, {{}, {Vgpr16, VgprP0}}}, usesTrue16) // 16-bit store
1065 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
1066 .Any({{B64, P0}, {{}, {VgprB64, VgprP0}}})
1067 .Any({{B96, P0}, {{}, {VgprB96, VgprP0}}})
1068 .Any({{B128, P0}, {{}, {VgprB128, VgprP0}}})
1069
1070 // addrspace(1), there are no stores to addrspace(4)
1071 // For targets:
1072 // - with "+flat-for-global" - global_store
1073 // - without(-flat-for-global) - buffer_store addr64
1074 .Any({{S16, DivP1}, {{}, {Vgpr16, VgprP1}}}, usesTrue16) // 16-bit store
1075 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1076 .Any({{B64, DivP1}, {{}, {VgprB64, VgprP1}}})
1077 .Any({{B96, DivP1}, {{}, {VgprB96, VgprP1}}})
1078 .Any({{B128, DivP1}, {{}, {VgprB128, VgprP1}}})
1079
1080 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
1081 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
1082 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
1083 .Any({{S16, UniP1}, {{}, {Vgpr16, SgprP1}}}, usesTrue16) // 16-bit store
1084 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1085 .Any({{B64, UniP1}, {{}, {VgprB64, SgprP1}}})
1086 .Any({{B96, UniP1}, {{}, {VgprB96, SgprP1}}})
1087 .Any({{B128, UniP1}, {{}, {VgprB128, SgprP1}}})
1088
1089 // addrspace(3) and addrspace(5)
1090 .Any({{S16, Ptr32}, {{}, {Vgpr16, VgprPtr32}}}, usesTrue16) // 16-bit store
1091 .Any({{B32, Ptr32}, {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
1092 .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
1093 .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
1094 .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
1095
1096 // clang-format on
1097
1098 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1099 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1100 StandardB)
1109
1110 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1111 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1112 StandardB)
1115
1116 addRulesForGOpcs(
1117 {G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, G_AMDGPU_BUFFER_LOAD_USHORT_TFE},
1118 StandardB)
1121
1122 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_TFE, G_AMDGPU_BUFFER_LOAD_FORMAT_TFE},
1123 StandardB)
1131 .Any({{UniB160},
1133
1134 addRulesForGOpcs(
1135 {G_AMDGPU_BUFFER_LOAD_FORMAT_D16, G_AMDGPU_TBUFFER_LOAD_FORMAT_D16},
1136 StandardB)
1143
1144 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
1145 G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
1146 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1147 G_AMDGPU_TBUFFER_STORE_FORMAT,
1148 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1149 .Any({{B32}, {{}, {VgprB32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1150 .Any({{B64}, {{}, {VgprB64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1151 .Any({{B96}, {{}, {VgprB96, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1152 .Any({{B128}, {{}, {VgprB128, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1153
1154 // Buffer atomics: resource descriptor + scalar offset are SGPR, data and
1155 // address components are VGPR.
1156 //
1157 // Operand order (SIInstructions.td BufferAtomicGenericInstruction):
1158 // dst = op vdata, rsrc, vindex, voffset, soffset, offset_imm, cachepolicy,
1159 // idxen_imm
1160 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_FADD})
1161 .Any({{S32, S32, V4S32, S32, S32, S32},
1163 .Any({{S64, S64, V4S32, S32, S32, S32},
1165 .Any({{V2S16, V2S16, V4S32, S32, S32, S32},
1166 {{VgprV2S16},
1168
1169 addRulesForGOpcs({G_PTR_ADD})
1170 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
1171 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
1172 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
1173 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
1174
1175 addRulesForGOpcs({G_INTTOPTR})
1176 .Any({{UniPtr32}, {{SgprPtr32}, {Sgpr32}}})
1177 .Any({{DivPtr32}, {{VgprPtr32}, {Vgpr32}}})
1178 .Any({{UniPtr64}, {{SgprPtr64}, {Sgpr64}}})
1179 .Any({{DivPtr64}, {{VgprPtr64}, {Vgpr64}}})
1180 .Any({{UniPtr128}, {{SgprPtr128}, {Sgpr128}}})
1181 .Any({{DivPtr128}, {{VgprPtr128}, {Vgpr128}}});
1182
1183 addRulesForGOpcs({G_PTRTOINT})
1184 .Any({{UniS32}, {{Sgpr32}, {SgprPtr32}}})
1185 .Any({{DivS32}, {{Vgpr32}, {VgprPtr32}}})
1186 .Any({{UniS64}, {{Sgpr64}, {SgprPtr64}}})
1187 .Any({{DivS64}, {{Vgpr64}, {VgprPtr64}}})
1188 .Any({{UniS128}, {{Sgpr128}, {SgprPtr128}}})
1189 .Any({{DivS128}, {{Vgpr128}, {VgprPtr128}}});
1190
1191 // FIXME: Update llvm/test/CodeGen/AMDGPU/ptrmask.ll to use GlobalISel.
1192 // Currently crashes on P8 (buffer resource) tests due to legalizer issue.
1193 addRulesForGOpcs({G_PTRMASK})
1194 .Any({{UniP1}, {{SgprP1}, {SgprP1, Sgpr64}}})
1195 .Any({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}})
1196 .Any({{UniP3}, {{SgprP3}, {SgprP3, Sgpr32}}})
1197 .Any({{DivP3}, {{VgprP3}, {VgprP3, Vgpr32}}});
1198
1199 addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
1200
1201 addRulesForGOpcs({G_BITREVERSE}, Standard)
1202 .Uni(S32, {{Sgpr32}, {Sgpr32}})
1203 .Div(S32, {{Vgpr32}, {Vgpr32}})
1204 .Uni(S64, {{Sgpr64}, {Sgpr64}})
1205 .Div(S64, {{Vgpr64}, {Vgpr64}});
1206
1207 addRulesForGOpcs({G_AMDGPU_FFBH_U32, G_AMDGPU_FFBL_B32, G_CTLZ_ZERO_UNDEF,
1208 G_CTTZ_ZERO_UNDEF})
1209 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
1210 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1211 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
1213
1214 addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
1215
1216 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
1217 .Uni(S64, {{Sgpr64}, {}});
1218
1219 addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
1220
1221 addRulesForGOpcs({G_GLOBAL_VALUE})
1222 .Any({{UniP0}, {{SgprP0}, {}}})
1223 .Any({{UniP1}, {{SgprP1}, {}}})
1224 .Any({{UniP3}, {{SgprP3}, {}}})
1225 .Any({{UniP4}, {{SgprP4}, {}}})
1226 .Any({{UniP8}, {{SgprP8}, {}}});
1227
1228 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
1229
1230 addRulesForGOpcs({G_SI_CALL})
1231 .Any({{_, UniP0}, {{None}, {SgprP0}}})
1232 .Any({{_, DivP0}, {{None}, {SgprP0Call_WF}}})
1233 .Any({{_, UniP4}, {{None}, {SgprP4}}})
1234 .Any({{_, DivP4}, {{None}, {SgprP4Call_WF}}});
1235
1236 bool hasSALUFloat = ST->hasSALUFloatInsts();
1237
1238 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
1239 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1240 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1241 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1242 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1243 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
1244 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1245 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1246 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1247 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
1249 hasSALUFloat)
1250 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1251
1252 addRulesForGOpcs({G_FSUB, G_STRICT_FSUB}, Standard)
1253 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1254 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1255 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1256 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1257 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1258 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
1259
1260 addRulesForGOpcs({G_FMAD}, Standard)
1261 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1262 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1263 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1264 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1265
1266 addRulesForGOpcs({G_FLDEXP, G_STRICT_FLDEXP}, Standard)
1267 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1268 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1269 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
1270 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1271 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr32}})
1272 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
1273
1274 addRulesForGOpcs({G_FMA, G_STRICT_FMA}, Standard)
1275 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1276 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
1277 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64, Vgpr64}})
1278 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr64}})
1282 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16, Sgpr16}}, hasSALUFloat)
1283 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}}, !hasSALUFloat)
1284 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}}, hasSALUFloat)
1285 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}, !hasSALUFloat)
1286 .Uni(V2S16,
1288 hasSALUFloat)
1290 !hasSALUFloat);
1291
1292 addRulesForGOpcs({G_AMDGPU_FMED3}, Standard)
1293 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1294 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1295 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1296 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1297
1298 // TODO: This opcode is generated from the i64->i16 signed clamped pattern in
1299 // the PreLegalizerCombiner. Move the combine to RegBankCombiner to keep more
1300 // instructions on SALU.
1301 addRulesForGOpcs({G_AMDGPU_SMED3}, Standard)
1302 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1303 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1304
1305 // FNEG and FABS are either folded as source modifiers or can be selected as
1306 // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
1307 // targets without SALU float we still select them as VGPR since there would
1308 // be no real sgpr use.
1309 addRulesForGOpcs({G_FNEG, G_FABS}, Standard)
1310 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasSALUFloat)
1311 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasSALUFloat)
1312 .Div(S16, {{Vgpr16}, {Vgpr16}})
1313 .Uni(S32, {{UniInVgprS32}, {Vgpr32}}, !hasSALUFloat)
1314 .Uni(S32, {{Sgpr32}, {Sgpr32}}, hasSALUFloat)
1315 .Div(S32, {{Vgpr32}, {Vgpr32}})
1316 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1317 .Div(S64, {{Vgpr64}, {Vgpr64}})
1318 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}}, !hasSALUFloat)
1319 .Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, hasSALUFloat)
1320 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1321 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1322 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1323
1324 addRulesForGOpcs({G_FCANONICALIZE}, Standard)
1325 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
1326 .Div(S32, {{Vgpr32}, {Vgpr32}})
1327 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
1328 .Div(S16, {{Vgpr16}, {Vgpr16}})
1329 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1330 .Div(S64, {{Vgpr64}, {Vgpr64}})
1331 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}})
1332 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1333 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1334 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1335
1336 bool hasPST = ST->hasPseudoScalarTrans();
1337 addRulesForGOpcs({G_FSQRT}, Standard)
1338 .Div(S16, {{Vgpr16}, {Vgpr16}})
1339 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasPST)
1340 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasPST);
1341
1342 addRulesForGOpcs({G_FPTOUI, G_FPTOSI})
1343 .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
1344 .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
1345 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1346 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat)
1347 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1348 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1349 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
1350 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1351 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1352 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}});
1353
1354 addRulesForGOpcs({G_UITOFP, G_SITOFP})
1355 .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
1356 .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
1357 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1358 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat)
1359 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1360 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1361 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
1362 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1363 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1364 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}});
1365
1366 addRulesForGOpcs({G_FPEXT})
1367 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1368 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1369 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}})
1370 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1371 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat);
1372
1373 addRulesForGOpcs({G_AMDGPU_CVT_PK_I16_I32}, Standard)
1374 .Uni(V2S16, {{UniInVgprV2S16}, {Vgpr32, Vgpr32}})
1375 .Div(V2S16, {{VgprV2S16}, {Vgpr32, Vgpr32}});
1376
1377 addRulesForGOpcs({G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY}, Standard)
1378 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1379 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
1380
1381 bool hasSALUMinimumMaximumInsts = ST->hasSALUMinimumMaximumInsts();
1382
1383 addRulesForGOpcs({G_FMINIMUM, G_FMAXIMUM}, Standard)
1384 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUMinimumMaximumInsts)
1385 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUMinimumMaximumInsts)
1386 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1387 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUMinimumMaximumInsts)
1388 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUMinimumMaximumInsts)
1389 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1390 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1391 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1393 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1394
1395 addRulesForGOpcs({G_FMINNUM_IEEE, G_FMAXNUM_IEEE, G_FMINNUM, G_FMAXNUM},
1396 Standard)
1397 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1398 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1399 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1400 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1402 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
1403 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1404 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1405 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1406 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
1407
1408 addRulesForGOpcs({G_FPTRUNC})
1409 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1410 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1411 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
1413 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
1414 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1415 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat);
1416
1417 addRulesForGOpcs({G_IS_FPCLASS})
1418 .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
1419 .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
1420 .Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}})
1421 .Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}})
1422 .Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
1423 .Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});
1424
1425 addRulesForGOpcs({G_FCMP}, Standard)
1426 .Any({{UniS1, _, S16}, {{Sgpr32Trunc}, {None, Sgpr16, Sgpr16}}},
1427 hasSALUFloat)
1428 .Any({{UniS1, _, S16}, {{UniInVcc}, {None, Vgpr16, Vgpr16}}},
1429 !hasSALUFloat)
1430 .Any({{DivS1, _, S16}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
1431 .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}},
1432 hasSALUFloat)
1433 .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}},
1434 !hasSALUFloat)
1435 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
1436 .Any({{UniS1, _, S64}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
1437 .Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
1438
1439 addRulesForGOpcs({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUNDEVEN, G_FFLOOR, G_FCEIL,
1440 G_FEXP2, G_FLOG2},
1441 Standard)
1442 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
1443 .Div(S16, {{Vgpr16}, {Vgpr16}})
1444 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
1445 .Div(S32, {{Vgpr32}, {Vgpr32}})
1446 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1447 .Div(S64, {{Vgpr64}, {Vgpr64}});
1448
1449 using namespace Intrinsic;
1450
1451 addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
1452
1453 addRulesForIOpcs({amdgcn_s_getreg}).Any({{}, {{Sgpr32}, {IntrId, Imm}}});
1454
1455 addRulesForIOpcs({amdgcn_s_setreg})
1456 .Any({{_, _, S32}, {{}, {IntrId, Imm, SgprB32_ReadFirstLane}}});
1457
1458 addRulesForIOpcs({amdgcn_groupstaticsize}).Any({{S32}, {{Sgpr32}, {IntrId}}});
1459
1460 // Intrinsics with no register operands.
1461 addRulesForIOpcs({amdgcn_endpgm,
1462 amdgcn_s_barrier,
1463 amdgcn_s_barrier_signal,
1464 amdgcn_s_barrier_wait,
1465 amdgcn_s_nop,
1466 amdgcn_s_sethalt,
1467 amdgcn_s_setprio,
1468 amdgcn_s_sleep,
1469 amdgcn_s_wait_asynccnt,
1470 amdgcn_s_wait_bvhcnt,
1471 amdgcn_s_wait_dscnt,
1472 amdgcn_s_wait_event,
1473 amdgcn_s_wait_event_export_ready,
1474 amdgcn_s_wait_expcnt,
1475 amdgcn_s_wait_kmcnt,
1476 amdgcn_s_wait_loadcnt,
1477 amdgcn_s_wait_samplecnt,
1478 amdgcn_s_wait_storecnt,
1479 amdgcn_s_wait_tensorcnt,
1480 amdgcn_s_waitcnt,
1481 amdgcn_wave_barrier})
1482 .Any({{}, {{}, {}}});
1483
1484 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
1485 addRulesForIOpcs({amdgcn_end_cf})
1486 .Any({{_, UniS32}, {{}, {IntrId, Sgpr32}}})
1487 .Any({{_, UniS64}, {{}, {IntrId, Sgpr64}}});
1488
1489 addRulesForIOpcs({amdgcn_if_break}, Standard)
1490 .Uni(S64, {{Sgpr64}, {IntrId, Vcc, Sgpr64}})
1491 .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
1492
1493 addRulesForIOpcs({amdgcn_exp})
1494 .Any({{_, _, _, S32, S32, S32, S32},
1495 {{}, {IntrId, Imm, Imm, Vgpr32, Vgpr32, Vgpr32, Vgpr32}}});
1496
1497 addRulesForIOpcs({amdgcn_exp_row})
1498 .Any({{_, _, _, S32, S32, S32, S32, _, S32},
1499 {{},
1501 SgprB32_M0}}});
1502
1503 addRulesForIOpcs({amdgcn_lds_param_load}, Standard)
1504 .Div(S32, {{Vgpr32}, {IntrId, Imm, Imm, SgprB32_M0}});
1505
1506 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
1507 .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
1508
1509 addRulesForIOpcs({amdgcn_readfirstlane})
1510 .Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}})
1511 // this should not exist in the first place, it is from call lowering
1512 // readfirstlaning just in case register is not in sgpr.
1513 .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
1514
1515 addRulesForIOpcs({amdgcn_wave_reduce_umax, amdgcn_wave_reduce_umin}, Standard)
1516 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}})
1517 .Div(S32, {{Sgpr32ToVgprDst}, {IntrId, VgprB32}})
1518 .Uni(S64, {{Sgpr64}, {IntrId, Sgpr64}})
1519 .Div(S64, {{Sgpr64ToVgprDst}, {IntrId, VgprB64}});
1520
1521 addRulesForIOpcs({amdgcn_bitop3}, Standard)
1522 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1523 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1524 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1525 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1526
1527 addRulesForIOpcs({amdgcn_mul_u24, amdgcn_mul_i24}, Standard)
1528 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1529 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}})
1530 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr32, Vgpr32}})
1531 .Div(S64, {{Vgpr64}, {IntrId, Vgpr32, Vgpr32}});
1532
1533 addRulesForIOpcs({amdgcn_mulhi_u24, amdgcn_mulhi_i24, amdgcn_fmul_legacy},
1534 Standard)
1535 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1536 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1537
1538 addRulesForIOpcs({amdgcn_fma_legacy}, Standard)
1539 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1540 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1541
1542 addRulesForIOpcs({amdgcn_frexp_mant, amdgcn_fract}, Standard)
1543 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}})
1544 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1545 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
1546 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1547 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64}})
1548 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64}});
1549
1550 addRulesForIOpcs({amdgcn_prng_b32})
1551 .Any({{UniS32}, {{UniInVgprS32}, {IntrId, Vgpr32}}})
1552 .Any({{DivS32}, {{Vgpr32}, {IntrId, Vgpr32}}});
1553
1554 addRulesForIOpcs({amdgcn_sffbh}, Standard)
1555 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}})
1556 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}});
1557
1558 addRulesForIOpcs({amdgcn_ubfe, amdgcn_sbfe}, Standard)
1559 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1560 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32, Sgpr32, Sgpr32}, S_BFE})
1561 .Uni(S64, {{Sgpr64}, {IntrId, Sgpr64, Sgpr32, Sgpr32}, S_BFE})
1562 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr32, Vgpr32}, V_BFE});
1563
1564 addRulesForIOpcs({amdgcn_cvt_pk_u16, amdgcn_cvt_pk_i16, amdgcn_cvt_pkrtz},
1565 Standard)
1566 .Div(V2S16, {{VgprV2S16}, {IntrId, Vgpr32, Vgpr32}})
1567 .Uni(V2S16, {{UniInVgprV2S16}, {IntrId, Vgpr32, Vgpr32}});
1568
1569 addRulesForIOpcs({amdgcn_global_load_tr_b64})
1570 .Any({{DivB64}, {{VgprB64}, {IntrId, SgprP1}}})
1571 .Any({{DivB32}, {{VgprB32}, {IntrId, SgprP1}}});
1572
1573 addRulesForIOpcs({amdgcn_global_load_tr_b128})
1574 .Any({{DivB64}, {{VgprB64}, {IntrId, SgprP1}}})
1575 .Any({{DivB128}, {{VgprB128}, {IntrId, SgprP1}}});
1576
1577 addRulesForIOpcs({amdgcn_global_atomic_ordered_add_b64})
1578 .Any({{DivS64}, {{Vgpr64}, {IntrId, VgprP1, Vgpr64}}});
1579
1580 addRulesForIOpcs(
1581 {amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num}, Standard)
1582 .Div(S32, {{Vgpr32}, {IntrId, VgprP1, Vgpr32}});
1583
1584 addRulesForIOpcs({amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num},
1585 Standard)
1586 .Div(S32, {{Vgpr32}, {IntrId, VgprP0, Vgpr32}});
1587
1588 addRulesForIOpcs({amdgcn_raw_buffer_load_lds})
1589 .Any({{_}, {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Sgpr32}}});
1590
1591 addRulesForIOpcs({amdgcn_struct_buffer_load_lds})
1592 .Any({{_},
1593 {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1594
1595 addRulesForIOpcs({amdgcn_raw_ptr_buffer_load_lds})
1596 .Any({{_}, {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Sgpr32}}});
1597
1598 addRulesForIOpcs({amdgcn_struct_ptr_buffer_load_lds})
1599 .Any({{_}, {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1600
1601 addRulesForIOpcs({amdgcn_wwm, amdgcn_strict_wwm, amdgcn_wqm, amdgcn_softwqm,
1602 amdgcn_strict_wqm},
1603 StandardB)
1604 .Div(B32, {{VgprB32}, {IntrId, VgprB32}})
1605 .Uni(B32, {{SgprB32}, {IntrId, SgprB32}})
1606 .Div(B64, {{VgprB64}, {IntrId, VgprB64}})
1607 .Uni(B64, {{SgprB64}, {IntrId, SgprB64}})
1608 .Div(B96, {{VgprB96}, {IntrId, VgprB96}})
1609 .Uni(B96, {{SgprB96}, {IntrId, SgprB96}})
1610 .Div(B128, {{VgprB128}, {IntrId, VgprB128}})
1611 .Uni(B128, {{SgprB128}, {IntrId, SgprB128}})
1612 .Any({{UniB256}, {{SgprB256}, {IntrId, SgprB256}}})
1613 .Any({{DivB256}, {{VgprB256}, {IntrId, VgprB256}}})
1614 .Any({{UniB512}, {{SgprB512}, {IntrId, SgprB512}}})
1615 .Any({{DivB512}, {{VgprB512}, {IntrId, VgprB512}}});
1616
1617 addRulesForIOpcs({amdgcn_sin, amdgcn_cos}, Standard)
1618 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1619 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}})
1620 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1621 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}});
1622
1623 addRulesForIOpcs(
1624 {amdgcn_ds_bvh_stack_rtn, amdgcn_ds_bvh_stack_push4_pop1_rtn}, Standard)
1625 .Div(S32, {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV4S32}});
1626
1627 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop1_rtn}, Standard)
1628 .Div(S32, {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV8S32}});
1629
1630 addRulesForIOpcs({amdgcn_ds_bvh_stack_push8_pop2_rtn}, Standard)
1631 .Div(S64, {{Vgpr64, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV8S32}});
1632
1633 addRulesForIOpcs({amdgcn_ds_swizzle}, Standard)
1634 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
1635 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}});
1636
1637 addRulesForIOpcs({amdgcn_ds_read_tr4_b64, amdgcn_ds_read_tr8_b64})
1638 .Any({{DivV2S32}, {{VgprV2S32}, {IntrId, VgprP3}}});
1639
1640 addRulesForIOpcs({amdgcn_ds_read_tr6_b96})
1641 .Any({{DivV3S32}, {{VgprV3S32}, {IntrId, VgprP3}}});
1642
1643 addRulesForIOpcs({amdgcn_ds_read_tr16_b64})
1644 .Any({{DivV4S16}, {{VgprV4S16}, {IntrId, VgprP3}}});
1645
1646} // end initialize rules
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
constexpr LLT S16
constexpr LLT S1
constexpr LLT V2S16
constexpr LLT S32
constexpr LLT V4S32
constexpr LLT V3S32
constexpr LLT S64
constexpr LLT V2S32
constexpr LLT S128
UniformityLLTOpPredicateID LLTToBId(LLT Ty)
bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI)
UniformityLLTOpPredicateID LLTToId(LLT Ty)
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
Register Reg
Register const TargetRegisterInfo * TRI
Machine IR instance of the generic uniformity analysis.
bool operator()(const MachineInstr &MI) const
Predicate operator||(const Predicate &RHS) const
Predicate operator&&(const Predicate &RHS) const
Predicate(std::function< bool(const MachineInstr &)> Pred)
Predicate operator!() const
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
const SetOfRulesForOpcode * getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping * findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
void addFastRuleUniform(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
bool isSigned() const
Definition InstrTypes.h:930
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
bool isEquality() const
Return true if this predicate is either EQ or NE.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
TypeSize getValue() const
Representation of each machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const TargetRegisterInfo * getTargetRegisterInfo() const
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void swap(SmallVectorImpl &RHS)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isAnyPtr(LLT Ty, unsigned Width)
bool isUniformMMO(const MachineMemOperand *MMO)
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
Definition SIInstrInfo.h:44
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
SmallVector< UniformityLLTOpPredicateID, 4 > OpUniformityAndTypes
PredicateMapping(std::initializer_list< UniformityLLTOpPredicateID > OpList, std::function< bool(const MachineInstr &)> TestFunc=nullptr)
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI) const
std::function< bool(const MachineInstr &)> TestFunc
RegBankLLTMapping(std::initializer_list< RegBankLLTMappingApplyID > DstOpMappingList, std::initializer_list< RegBankLLTMappingApplyID > SrcOpMappingList, LoweringMethodID LoweringMethod=DoNotLower)
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39